def enhance_with_archive_items(self, items): if items: ids = list(set([item.get('item_id') for item in items if item.get('item_id')])) archive_items = [] archive_lookup = {} if ids: query = {'$and': [{config.ID_FIELD: {'$in': ids}}]} archive_req = ParsedRequest() archive_req.max_results = len(ids) # can't access published from elastic due filter on the archive resource hence going to mongo archive_items = list(superdesk.get_resource_service(ARCHIVE) .get_from_mongo(req=archive_req, lookup=query)) takes_service = TakesPackageService() takes_service.enhance_items_with_takes_packages(archive_items) for item in archive_items: handle_existing_data(item) archive_lookup[item[config.ID_FIELD]] = item for item in items: archive_item = archive_lookup.get(item.get('item_id'), {config.VERSION: item.get(config.VERSION, 1)}) updates = { config.ID_FIELD: item.get('item_id'), 'item_id': item.get(config.ID_FIELD), 'lock_user': archive_item.get('lock_user', None), 'lock_time': archive_item.get('lock_time', None), 'lock_action': archive_item.get('lock_action', None), 'lock_session': archive_item.get('lock_session', None), 'archive_item': archive_item if archive_item else None } item.update(updates) handle_existing_data(item)
def on_update(self, updates, original): """Runs on archive update. Overridden to validate the updates to the article and take necessary actions depending on the updates. In brief, it does the following: 1. Sets state, item operation, version created, version creator, sign off and word count. 2. Resets Item Expiry 3. If the request is to de-schedule then checks and de-schedules the associated Takes Package also. 4. Creates Crops if article is a picture """ user = get_user() self._validate_updates(original, updates, user) if PUBLISH_SCHEDULE in updates and original[ITEM_STATE] == CONTENT_STATE.SCHEDULED: # check if there is a takes package and deschedule the takes package. takes_service = TakesPackageService() package = takes_service.get_take_package(original) if package and package.get(ITEM_STATE) == CONTENT_STATE.SCHEDULED: get_resource_service('published').delete_by_article_id(package.get(config.ID_FIELD)) self.delete_by_article_ids([package.get(config.ID_FIELD)]) updates[LINKED_IN_PACKAGES] = [package for package in original.get(LINKED_IN_PACKAGES, []) if package.get(PACKAGE_TYPE) != TAKES_PACKAGE] return if self.__is_req_for_save(updates): update_state(original, updates) remove_unwanted(updates) self._add_system_updates(original, updates, user) self._add_desk_metadata(updates, original) self._handle_media_updates(updates, original, user)
def validate_embargo(self, item): """ Validates the embargo of the item. Following are checked: 1. Item can't be a package or a take or a re-write of another story 2. Publish Schedule and Embargo are mutually exclusive 3. Always a future date except in case of Corrected and Killed. :raises: SuperdeskApiError.badRequestError() if the validation fails """ if item[ITEM_TYPE] != CONTENT_TYPE.COMPOSITE: if EMBARGO in item: embargo = item.get(SCHEDULE_SETTINGS, {}).get('utc_{}'.format(EMBARGO)) if embargo: if item.get(PUBLISH_SCHEDULE) or item[ITEM_STATE] == CONTENT_STATE.SCHEDULED: raise SuperdeskApiError.badRequestError("An item can't have both Publish Schedule and Embargo") if (item[ITEM_STATE] not in {CONTENT_STATE.KILLED, CONTENT_STATE.SCHEDULED}) \ and embargo <= utcnow(): raise SuperdeskApiError.badRequestError("Embargo cannot be earlier than now") package = TakesPackageService().get_take_package(item) if package and package.get(SEQUENCE, 1) > 1: raise SuperdeskApiError.badRequestError("Takes doesn't support Embargo") if item.get('rewrite_of'): raise SuperdeskApiError.badRequestError("Rewrites doesn't support Embargo") if not isinstance(embargo, datetime.date) or not embargo.time(): raise SuperdeskApiError.badRequestError("Invalid Embargo") elif is_normal_package(item): if item.get(EMBARGO): raise SuperdeskApiError.badRequestError("A Package doesn't support Embargo") self.packageService.check_if_any_item_in_package_has_embargo(item)
def on_update(self, updates, original): """ Overridden to validate the updates to the article and take necessary actions depending on the updates. In brief, it does the following: 1. Sets state, item operation, version created, version creator, sign off and word count. 2. Resets Item Expiry 3. If the request is to de-schedule then checks and de-schedules the associated Takes Package also. 4. Creates Crops if article is a picture """ user = get_user() self._validate_updates(original, updates, user) if 'publish_schedule' in updates and original[ITEM_STATE] == CONTENT_STATE.SCHEDULED: self.deschedule_item(updates, original) # this is an deschedule action # check if there is a takes package and deschedule the takes package. package = TakesPackageService().get_take_package(original) if package and package.get('state') == 'scheduled': package_updates = {'publish_schedule': None, 'groups': package.get('groups')} self.patch(package.get(config.ID_FIELD), package_updates) return if self.__is_req_for_save(updates): update_state(original, updates) remove_unwanted(updates) self._add_system_updates(original, updates, user) if original[ITEM_TYPE] == CONTENT_TYPE.PICTURE: # create crops CropService().create_multiple_crops(updates, original)
def enhance_with_archive_items(self, items): if items: ids = list(set([item.get('item_id') for item in items if item.get('item_id')])) archive_items = [] if ids: query = {'$and': [{config.ID_FIELD: {'$in': ids}}]} archive_req = ParsedRequest() archive_req.max_results = len(ids) # can't access published from elastic due filter on the archive resource hence going to mongo archive_items = list(superdesk.get_resource_service(ARCHIVE) .get_from_mongo(req=archive_req, lookup=query)) takes_service = TakesPackageService() for item in archive_items: handle_existing_data(item) takes_service.enhance_with_package_info(item) for item in items: archive_item = [i for i in archive_items if i.get(config.ID_FIELD) == item.get('item_id')] archive_item = archive_item[0] if len(archive_item) > 0 else \ {config.VERSION: item.get(config.VERSION, 1)} updates = { config.ID_FIELD: item.get('item_id'), 'item_id': item.get(config.ID_FIELD), 'lock_user': archive_item.get('lock_user', None), 'lock_time': archive_item.get('lock_time', None), 'lock_session': archive_item.get('lock_session', None), 'archive_item': archive_item if archive_item else None } item.update(updates) handle_existing_data(item)
def enhance_with_archive_items(self, items): if items: ids = list(set([item.get("item_id") for item in items if item.get("item_id")])) archive_items = [] archive_lookup = {} if ids: query = {"$and": [{config.ID_FIELD: {"$in": ids}}]} archive_req = ParsedRequest() archive_req.max_results = len(ids) # can't access published from elastic due filter on the archive resource hence going to mongo archive_items = list( superdesk.get_resource_service(ARCHIVE).get_from_mongo(req=archive_req, lookup=query) ) takes_service = TakesPackageService() takes_service.enhance_items_with_takes_packages(archive_items) for item in archive_items: handle_existing_data(item) archive_lookup[item[config.ID_FIELD]] = item for item in items: archive_item = archive_lookup.get(item.get("item_id"), {config.VERSION: item.get(config.VERSION, 1)}) updates = { config.ID_FIELD: item.get("item_id"), "item_id": item.get(config.ID_FIELD), "lock_user": archive_item.get("lock_user", None), "lock_time": archive_item.get("lock_time", None), "lock_session": archive_item.get("lock_session", None), "archive_item": archive_item if archive_item else None, } item.update(updates) handle_existing_data(item)
def on_update(self, updates, original): """ Overridden to validate the updates to the article and take necessary actions depending on the updates. In brief, it does the following: 1. Sets state, item operation, version created, version creator, sign off and word count. 2. Resets Item Expiry 3. If the request is to de-schedule then checks and de-schedules the associated Takes Package also. 4. Creates Crops if article is a picture """ user = get_user() self._validate_updates(original, updates, user) if PUBLISH_SCHEDULE in updates and original[ ITEM_STATE] == CONTENT_STATE.SCHEDULED: self.deschedule_item(updates, original) # this is an deschedule action # check if there is a takes package and deschedule the takes package. takes_service = TakesPackageService() package = takes_service.get_take_package(original) if package and package.get(ITEM_STATE) == CONTENT_STATE.SCHEDULED: get_resource_service('published').delete_by_article_id( package.get(config.ID_FIELD)) self.delete_by_article_ids([package.get(config.ID_FIELD)]) updates[LINKED_IN_PACKAGES] = [ package for package in original.get(LINKED_IN_PACKAGES, []) if package.get(PACKAGE_TYPE) != TAKES_PACKAGE ] return if self.__is_req_for_save(updates): update_state(original, updates) remove_unwanted(updates) self._add_system_updates(original, updates, user) self._add_desk_metadata(updates, original) if original[ITEM_TYPE] == CONTENT_TYPE.PICTURE: # create crops CropService().create_multiple_crops(updates, original) updates_feature_image = updates.get('associations', {}).get('featureimage') if updates_feature_image and 'poi' in updates_feature_image: original_feature_image = original.get('associations', {}).get('featureimage', {}) if original_feature_image and original_feature_image.get( 'poi', {}) == updates_feature_image['poi']: return _id = updates_feature_image[config.ID_FIELD] if config.ID_FIELD in updates_feature_image \ else original_feature_image[config.ID_FIELD] image_item = self.find_one(req=None, _id=_id) if image_item: image_item['poi'] = updates_feature_image['poi'] image_item = self.patch(_id, image_item) updates['associations']['featureimage'][ 'renditions'] = image_item['renditions']
def _update_rewrite(self, original): """ Removes the reference from the rewritten story in published collection """ rewrite_service = ArchiveRewriteService() if original.get('rewrite_of') and original.get('event_id'): rewrite_service._clear_rewritten_flag(original.get('event_id'), original[config.ID_FIELD], 'rewritten_by') # write the rewritten_by to the take before spiked archive_service = get_resource_service(ARCHIVE) published_service = get_resource_service('published') takes_service = TakesPackageService() takes_package = takes_service.get_take_package(original) if takes_package and takes_package.get( SEQUENCE, 0) > 1 and original.get('rewritten_by'): # get the rewritten by rewritten_by = archive_service.find_one( req=None, _id=original.get('rewritten_by')) # get the take take_id = takes_service.get_take_by_take_no( original, take_no=takes_package.get(SEQUENCE) - 1, package=takes_package) take = archive_service.find_one(req=None, _id=take_id) # update the take and takes package with rewritten_by if take.get('rewritten_by') != rewritten_by[config.ID_FIELD]: if take.get(ITEM_STATE) in PUBLISH_STATES: published_service.update_published_items( take_id, 'rewritten_by', rewritten_by[config.ID_FIELD]) archive_service.system_update( take[config.ID_FIELD], {'rewritten_by': rewritten_by[config.ID_FIELD]}, take) if takes_package.get('rewritten_by') != rewritten_by[ config.ID_FIELD]: if takes_package.get(ITEM_STATE) in PUBLISH_STATES: published_service.update_published_items( takes_package.get(config.ID_FIELD), 'rewritten_by', rewritten_by[config.ID_FIELD]) archive_service.system_update( takes_package[config.ID_FIELD], {'rewritten_by': rewritten_by[config.ID_FIELD]}, takes_package) if rewritten_by.get('rewrite_of') != takes_package.get( config.ID_FIELD): archive_service.system_update( rewritten_by[config.ID_FIELD], {'rewrite_of': takes_package.get(config.ID_FIELD)}, rewritten_by)
def on_update(self, updates, original): """Runs on archive update. Overridden to validate the updates to the article and take necessary actions depending on the updates. In brief, it does the following: 1. Sets state, item operation, version created, version creator, sign off and word count. 2. Resets Item Expiry 3. If the request is to de-schedule then checks and de-schedules the associated Takes Package also. 4. Creates Crops if article is a picture """ user = get_user() self._validate_updates(original, updates, user) if PUBLISH_SCHEDULE in updates and original[ ITEM_STATE] == CONTENT_STATE.SCHEDULED: # check if there is a takes package and deschedule the takes package. takes_service = TakesPackageService() package = takes_service.get_take_package(original) if package and package.get(ITEM_STATE) == CONTENT_STATE.SCHEDULED: get_resource_service('published').delete_by_article_id( package.get(config.ID_FIELD)) self.delete_by_article_ids([package.get(config.ID_FIELD)]) updates[LINKED_IN_PACKAGES] = [ package for package in original.get(LINKED_IN_PACKAGES, []) if package.get(PACKAGE_TYPE) != TAKES_PACKAGE ] return if self.__is_req_for_save(updates): update_state(original, updates) remove_unwanted(updates) self._add_system_updates(original, updates, user) self._add_desk_metadata(updates, original) if original[ITEM_TYPE] == CONTENT_TYPE.PICTURE: # create crops CropService().create_multiple_crops(updates, original) # iterate over associations. Validate and process them if they are stored in database if 'associations' in updates: for item_name, item_obj in updates.get('associations').items(): if item_obj and config.ID_FIELD in item_obj: _id = item_obj[config.ID_FIELD] stored_item = self.find_one(req=None, _id=_id) if stored_item: self._validate_updates(stored_item, item_obj, user) if stored_item[ ITEM_TYPE] == CONTENT_TYPE.PICTURE: # create crops CropService().create_multiple_crops( item_obj, stored_item) stored_item.update(item_obj) updates['associations'][item_name] = stored_item
def set_usn(self, odbc_item, article): """ Set the usn (unique story number) in the odbc item :param odbc_item: :param article: :return: """ takes_package_service = TakesPackageService() pkg = takes_package_service.get_take_package(article) if pkg is not None: odbc_item['usn'] = pkg.get('unique_id', None) # @usn else: odbc_item['usn'] = article.get('unique_id', None) # @usn
def on_update(self, updates, original): """ Overridden to validate the updates to the article and take necessary actions depending on the updates. In brief, it does the following: 1. Sets state, item operation, version created, version creator, sign off and word count. 2. Resets Item Expiry 3. If the request is to de-schedule then checks and de-schedules the associated Takes Package also. 4. Creates Crops if article is a picture """ user = get_user() self._validate_updates(original, updates, user) if PUBLISH_SCHEDULE in updates and original[ITEM_STATE] == CONTENT_STATE.SCHEDULED: self.deschedule_item(updates, original) # this is an deschedule action # check if there is a takes package and deschedule the takes package. takes_service = TakesPackageService() package = takes_service.get_take_package(original) if package and package.get(ITEM_STATE) == CONTENT_STATE.SCHEDULED: get_resource_service('published').delete_by_article_id(package.get(config.ID_FIELD)) self.delete_by_article_ids([package.get(config.ID_FIELD)]) updates[LINKED_IN_PACKAGES] = [package for package in original.get(LINKED_IN_PACKAGES, []) if package.get(PACKAGE_TYPE) != TAKES_PACKAGE] return if self.__is_req_for_save(updates): update_state(original, updates) remove_unwanted(updates) self._add_system_updates(original, updates, user) self._add_desk_metadata(updates, original) if original[ITEM_TYPE] == CONTENT_TYPE.PICTURE: # create crops CropService().create_multiple_crops(updates, original) updates_feature_image = updates.get('associations', {}).get('featureimage') if updates_feature_image and 'poi' in updates_feature_image: original_feature_image = original.get('associations', {}).get('featureimage', {}) if original_feature_image and original_feature_image.get('poi', {}) == updates_feature_image['poi']: return _id = updates_feature_image[config.ID_FIELD] if config.ID_FIELD in updates_feature_image \ else original_feature_image[config.ID_FIELD] image_item = self.find_one(req=None, _id=_id) if image_item: image_item['poi'] = updates_feature_image['poi'] image_item = self.patch(_id, image_item) updates['associations']['featureimage']['renditions'] = image_item['renditions']
def _update_rewrite(self, original): """Removes the reference from the rewritten story in published collection.""" rewrite_service = ArchiveRewriteService() if original.get('rewrite_of') and original.get('event_id'): rewrite_service._clear_rewritten_flag(original.get('event_id'), original[config.ID_FIELD], 'rewritten_by') # write the rewritten_by to the take before spiked archive_service = get_resource_service(ARCHIVE) published_service = get_resource_service('published') takes_service = TakesPackageService() takes_package = takes_service.get_take_package(original) if takes_package and takes_package.get(SEQUENCE, 0) > 1 and original.get('rewritten_by'): # get the rewritten by rewritten_by = archive_service.find_one(req=None, _id=original.get('rewritten_by')) # get the take take_id = takes_service.get_take_by_take_no(original, take_no=takes_package.get(SEQUENCE) - 1, package=takes_package) take = archive_service.find_one(req=None, _id=take_id) # update the take and takes package with rewritten_by if take.get('rewritten_by') != rewritten_by[config.ID_FIELD]: if take.get(ITEM_STATE) in PUBLISH_STATES: published_service.update_published_items(take_id, 'rewritten_by', rewritten_by[config.ID_FIELD]) archive_service.system_update(take[config.ID_FIELD], {'rewritten_by': rewritten_by[config.ID_FIELD]}, take) if takes_package.get('rewritten_by') != rewritten_by[config.ID_FIELD]: if takes_package.get(ITEM_STATE) in PUBLISH_STATES: published_service.update_published_items(takes_package.get(config.ID_FIELD), 'rewritten_by', rewritten_by[config.ID_FIELD]) archive_service.system_update(takes_package[config.ID_FIELD], {'rewritten_by': rewritten_by[config.ID_FIELD]}, takes_package) if rewritten_by.get('rewrite_of') != takes_package.get(config.ID_FIELD): archive_service.system_update(rewritten_by[config.ID_FIELD], {'rewrite_of': takes_package.get(config.ID_FIELD)}, rewritten_by) elif original.get('rewritten_by') or (takes_package and takes_package.get('rewritten_by')): # you are spike the story from which the rewrite was triggered. # in this case both rewrite_of and rewritten_by are published. rewrite_id = original.get('rewritten_by') or takes_package.get('rewritten_by') rewritten_by = archive_service.find_one(req=None, _id=rewrite_id) archive_service.system_update(rewrite_id, {'rewrite_of': None, 'rewrite_sequence': 0}, rewritten_by)
def validate_embargo(self, item): """ Validates the embargo of the item. Following are checked: 1. Item can't be a package or a take or a re-write of another story 2. Publish Schedule and Embargo are mutually exclusive 3. Always a future date except in case of Corrected and Killed. :raises: SuperdeskApiError.badRequestError() if the validation fails """ if item[ITEM_TYPE] != CONTENT_TYPE.COMPOSITE: embargo = item.get(EMBARGO) if embargo: if item.get('publish_schedule') or item[ITEM_STATE] == CONTENT_STATE.SCHEDULED: raise SuperdeskApiError.badRequestError("An item can't have both Publish Schedule and Embargo") package = TakesPackageService().get_take_package(item) if package: raise SuperdeskApiError.badRequestError("Takes doesn't support Embargo") if item.get('rewrite_of'): raise SuperdeskApiError.badRequestError("Rewrites doesn't support Embargo") if not isinstance(embargo, datetime.date) or not embargo.time(): raise SuperdeskApiError.badRequestError("Invalid Embargo") if item[ITEM_STATE] not in PUBLISH_STATES and embargo <= utcnow(): raise SuperdeskApiError.badRequestError("Embargo cannot be earlier than now") elif is_normal_package(item): if item.get(EMBARGO): raise SuperdeskApiError.badRequestError("A Package doesn't support Embargo") self.packageService.check_if_any_item_in_package_has_embargo(item)
def on_update(self, updates, original): """Runs on archive update. Overridden to validate the updates to the article and take necessary actions depending on the updates. In brief, it does the following: 1. Sets state, item operation, version created, version creator, sign off and word count. 2. Resets Item Expiry 3. If the request is to de-schedule then checks and de-schedules the associated Takes Package also. 4. Creates Crops if article is a picture """ user = get_user() self._validate_updates(original, updates, user) if PUBLISH_SCHEDULE in updates and original[ITEM_STATE] == CONTENT_STATE.SCHEDULED: # check if there is a takes package and deschedule the takes package. takes_service = TakesPackageService() package = takes_service.get_take_package(original) if package and package.get(ITEM_STATE) == CONTENT_STATE.SCHEDULED: get_resource_service('published').delete_by_article_id(package.get(config.ID_FIELD)) self.delete_by_article_ids([package.get(config.ID_FIELD)]) updates[LINKED_IN_PACKAGES] = [package for package in original.get(LINKED_IN_PACKAGES, []) if package.get(PACKAGE_TYPE) != TAKES_PACKAGE] return if self.__is_req_for_save(updates): update_state(original, updates) remove_unwanted(updates) self._add_system_updates(original, updates, user) self._add_desk_metadata(updates, original) if original[ITEM_TYPE] == CONTENT_TYPE.PICTURE: # create crops CropService().create_multiple_crops(updates, original) # iterate over associations. Validate and process them if they are stored in database if 'associations' in updates: for item_name, item_obj in updates.get('associations').items(): if item_obj and config.ID_FIELD in item_obj: _id = item_obj[config.ID_FIELD] stored_item = self.find_one(req=None, _id=_id) if stored_item: self._validate_updates(stored_item, item_obj, user) if stored_item[ITEM_TYPE] == CONTENT_TYPE.PICTURE: # create crops CropService().create_multiple_crops(item_obj, stored_item) stored_item.update(item_obj) updates['associations'][item_name] = stored_item
def _validate_unlink(self, target): """Validates that the links for takes or updates can be removed. :param target: article whose links will be removed :raises: SuperdeskApiError """ if target[ITEM_TYPE] != CONTENT_TYPE.TEXT: raise SuperdeskApiError.badRequestError("Only text stories can be unlinked!") # if the story is in published states then it cannot be unlinked if target[ITEM_STATE] in [CONTENT_STATE.PUBLISHED, CONTENT_STATE.CORRECTED, CONTENT_STATE.KILLED]: raise SuperdeskApiError.badRequestError("Published stories cannot be unlinked!") # if the story is not the last take then it cannot be unlinked if TakesPackageService().get_take_package(target) and \ not TakesPackageService().is_last_takes_package_item(target): raise SuperdeskApiError.badRequestError("Only the last take can be unlinked!")
class ArchiveLinkService(Service): packageService = TakesPackageService() def create(self, docs, **kwargs): target_id = request.view_args['target_id'] doc = docs[0] link_id = doc.get('link_id') desk_id = doc.get('desk') service = get_resource_service(ARCHIVE) target = service.find_one(req=None, _id=target_id) self._validate_link(target, target_id) link = {} if desk_id: link = {'task': {'desk': desk_id}} user = get_user() lookup = {'_id': desk_id, 'members.user': user['_id']} desk = get_resource_service('desks').find_one(req=None, **lookup) if not desk: raise SuperdeskApiError.forbiddenError( "No privileges to create new take on requested desk.") link['task']['stage'] = desk['working_stage'] if link_id: link = service.find_one(req=None, _id=link_id) linked_item = self.packageService.link_as_next_take(target, link) insert_into_versions(id_=linked_item[config.ID_FIELD]) doc.update(linked_item) build_custom_hateoas(CUSTOM_HATEOAS, doc) return [linked_item['_id']] def _validate_link(self, target, target_id): """Validates the article to be linked. :param target: article to be linked :param target_id: id of the article to be linked :raises: SuperdeskApiError """ if not target: raise SuperdeskApiError.notFoundError( message='Cannot find the target item with id {}.'.format( target_id)) if target.get(EMBARGO): raise SuperdeskApiError.badRequestError( "Takes can't be created for an Item having Embargo") if is_genre(target, BROADCAST_GENRE): raise SuperdeskApiError.badRequestError( "Cannot add new take to the story with genre as broadcast.") if get_resource_service('published').is_rewritten_before( target['_id']): raise SuperdeskApiError.badRequestError( message='Article has been rewritten before !')
def enhance_with_archive_items(self, items): if items: ids = list(set([item.get("item_id") for item in items if item.get("item_id")])) archive_items = [] if ids: query = {"$and": [{config.ID_FIELD: {"$in": ids}}]} archive_req = ParsedRequest() archive_req.max_results = len(ids) # can't access published from elastic due filter on the archive resource hence going to mongo archive_items = list( superdesk.get_resource_service(ARCHIVE).get_from_mongo(req=archive_req, lookup=query) ) takes_service = TakesPackageService() for item in archive_items: handle_existing_data(item) takes_service.enhance_with_package_info(item) for item in items: try: archive_item = [i for i in archive_items if i.get(config.ID_FIELD) == item.get("item_id")][0] except IndexError: logger.exception( ( "Data inconsistency found for the published item {}. " "Cannot find item {} in the archive collection." ).format(item.get(config.ID_FIELD), item.get("item_id")) ) archive_item = {} updates = { config.ID_FIELD: item.get("item_id"), "item_id": item.get(config.ID_FIELD), "lock_user": archive_item.get("lock_user", None), "lock_time": archive_item.get("lock_time", None), "lock_session": archive_item.get("lock_session", None), "archive_item": archive_item if archive_item else None, } item.update(updates) handle_existing_data(item)
def delete(self, lookup): target_id = request.view_args['target_id'] archive_service = get_resource_service(ARCHIVE) target = archive_service.find_one(req=None, _id=target_id) self._validate_unlink(target) updates = {} takes_package = TakesPackageService().get_take_package(target) if takes_package and TakesPackageService().is_last_takes_package_item(target): # remove the take link PackageService().remove_refs_in_package(takes_package, target_id) if target.get('rewrite_of'): # remove the rewrite info ArchiveSpikeService().update_rewrite(target) if not takes_package and not target.get('rewrite_of'): # there is nothing to do raise SuperdeskApiError.badRequestError("Only takes and updates can be unlinked!") if target.get('rewrite_of'): updates['rewrite_of'] = None if target.get('anpa_take_key'): updates['anpa_take_key'] = None if target.get('rewrite_sequence'): updates['rewrite_sequence'] = None if target.get('sequence'): updates['sequence'] = None updates['event_id'] = generate_guid(type=GUID_TAG) archive_service.system_update(target_id, updates, target) user = get_user(required=True) push_notification('item:unlink', item=target_id, user=str(user.get(config.ID_FIELD))) app.on_archive_item_updated(updates, target, ITEM_UNLINK)
def enhance_with_archive_items(self, items): if items: ids = list(set([item.get('item_id') for item in items if item.get('item_id')])) archive_items = [] if ids: query = {'$and': [{'_id': {'$in': ids}}]} archive_req = ParsedRequest() archive_req.max_results = len(ids) # can't access published from elastic due filter on the archive resource hence going to mongo archive_items = list(superdesk.get_resource_service(ARCHIVE) .get_from_mongo(req=archive_req, lookup=query)) takes_service = TakesPackageService() for item in archive_items: handle_existing_data(item) takes_service.enhance_with_package_info(item) for item in items: try: archive_item = [i for i in archive_items if i.get('_id') == item.get('item_id')][0] except IndexError: logger.exception(('Data inconsistency found for the published item {}. ' 'Cannot find item {} in the archive collection.') .format(item.get('_id'), item.get('item_id'))) archive_item = {} updates = { '_id': item.get('item_id'), 'item_id': item.get('_id'), 'lock_user': archive_item.get('lock_user', None), 'lock_time': archive_item.get('lock_time', None), 'lock_session': archive_item.get('lock_session', None), 'archive_item': archive_item if archive_item else None } item.update(updates) handle_existing_data(item)
class BasePublishService(BaseService): """ Base service class for "publish" endpoint """ publish_type = 'publish' published_state = 'published' non_digital = partial(filter, lambda s: s.get('subscriber_type', '') == SUBSCRIBER_TYPES.WIRE) digital = partial(filter, lambda s: (s.get('subscriber_type', '') in {SUBSCRIBER_TYPES.DIGITAL, SUBSCRIBER_TYPES.ALL})) takes_package_service = TakesPackageService() package_service = PackageService() def on_update(self, updates, original): self._validate(original, updates) self._set_updates(original, updates, updates.get(config.LAST_UPDATED, utcnow())) convert_task_attributes_to_objectId(updates) # ??? self._process_publish_updates(original, updates) def on_updated(self, updates, original): original = get_resource_service(ARCHIVE).find_one(req=None, _id=original[config.ID_FIELD]) updates.update(original) if updates[ITEM_OPERATION] != ITEM_KILL and \ original.get(ITEM_TYPE) in [CONTENT_TYPE.TEXT, CONTENT_TYPE.PREFORMATTED]: get_resource_service('archive_broadcast').on_broadcast_master_updated(updates[ITEM_OPERATION], original) get_resource_service('archive_broadcast').reset_broadcast_status(updates, original) push_content_notification([updates]) self._import_into_legal_archive(updates) def update(self, id, updates, original): """ Handles workflow of each Publish, Corrected and Killed. """ try: user = get_user() auto_publish = updates.pop('auto_publish', False) if original[ITEM_TYPE] == CONTENT_TYPE.COMPOSITE: self._publish_package_items(original, updates) self._update_archive(original, updates, should_insert_into_versions=auto_publish) else: self._publish_associations(original, id) updated = deepcopy(original) updated.update(updates) if self.published_state != CONTENT_STATE.KILLED: self._process_takes_package(original, updated, updates) self._update_archive(original, updated, should_insert_into_versions=auto_publish) self.update_published_collection(published_item_id=original[config.ID_FIELD], updated=updated) from apps.publish.enqueue import enqueue_published enqueue_published.apply_async() push_notification('item:publish', item=str(id), unique_name=original['unique_name'], desk=str(original.get('task', {}).get('desk', '')), user=str(user.get(config.ID_FIELD, ''))) except SuperdeskApiError as e: raise e except KeyError as e: raise SuperdeskApiError.badRequestError( message="Key is missing on article to be published: {}".format(str(e))) except Exception as e: logger.exception("Something bad happened while publishing %s".format(id)) raise SuperdeskApiError.internalError(message="Failed to publish the item: {}".format(str(e))) def _process_takes_package(self, original, updated, updates): # if target_for is set then we don't to digital client. targeted_for = updates.get('targeted_for', original.get('targeted_for')) if original[ITEM_TYPE] in {CONTENT_TYPE.TEXT, CONTENT_TYPE.PREFORMATTED} \ and not (targeted_for or is_genre(original, BROADCAST_GENRE)): # check if item is in a digital package last_updated = updates.get(config.LAST_UPDATED, utcnow()) package = self.takes_package_service.get_take_package(original) if not package: ''' If type of the item is text or preformatted then item need to be sent to digital subscribers, so package the item as a take. ''' package_id = self.takes_package_service.package_story_as_a_take(updated, {}, None) package = get_resource_service(ARCHIVE).find_one(req=None, _id=package_id) package_id = package[config.ID_FIELD] package_updates = self.process_takes(updates_of_take_to_be_published=updates, original_of_take_to_be_published=original, package=package) # If the original package is corrected then the next take shouldn't change it # back to 'published' preserve_state = package.get(ITEM_STATE, '') == CONTENT_STATE.CORRECTED and \ updates.get(ITEM_OPERATION, ITEM_PUBLISH) == ITEM_PUBLISH self._set_updates(package, package_updates, last_updated, preserve_state) package_updates.setdefault(ITEM_OPERATION, updates.get(ITEM_OPERATION, ITEM_PUBLISH)) self._update_archive(package, package_updates) package.update(package_updates) self.update_published_collection(published_item_id=package_id) self._import_into_legal_archive(package) def _validate(self, original, updates): self.raise_if_not_marked_for_publication(original) self.raise_if_invalid_state_transition(original) updated = original.copy() updated.update(updates) takes_package = self.takes_package_service.get_take_package(original) if self.publish_type == 'publish': # validate if take can be published if takes_package and not self.takes_package_service.can_publish_take( takes_package, updates.get(SEQUENCE, original.get(SEQUENCE, 1))): raise PublishQueueError.previous_take_not_published_error( Exception("Previous takes are not published.")) validate_schedule(updated.get(PUBLISH_SCHEDULE), takes_package.get(SEQUENCE, 1) if takes_package else 1) update_schedule_settings(updated, PUBLISH_SCHEDULE, updated.get(PUBLISH_SCHEDULE)) if original[ITEM_TYPE] != CONTENT_TYPE.COMPOSITE and updates.get(EMBARGO): get_resource_service(ARCHIVE).validate_embargo(updated) if self.publish_type in [ITEM_CORRECT, ITEM_KILL]: if updates.get(EMBARGO): raise SuperdeskApiError.badRequestError("Embargo can't be set after publishing") if updates.get('dateline'): raise SuperdeskApiError.badRequestError("Dateline can't be modified after publishing") if self.publish_type == ITEM_PUBLISH and updated.get('rewritten_by'): # if update is published then user cannot publish the takes rewritten_by = get_resource_service(ARCHIVE).find_one(req=None, _id=updated.get('rewritten_by')) if rewritten_by and rewritten_by.get(ITEM_STATE) in PUBLISH_STATES: raise SuperdeskApiError.badRequestError("Cannot publish the story after Update is published.!") validate_item = {'act': self.publish_type, 'type': original['type'], 'validate': updated} validation_errors = get_resource_service('validate').post([validate_item]) if validation_errors[0]: raise ValidationError(validation_errors) if original[ITEM_TYPE] == CONTENT_TYPE.COMPOSITE: package_validation_errors = [] self._validate_package_contents(original, takes_package, package_validation_errors) if len(package_validation_errors) > 0: raise ValidationError(package_validation_errors) self._validate_package(original, updates) def _validate_package(self, package, updates): items = self.package_service.get_residrefs(package) if self.publish_type in [ITEM_CORRECT, ITEM_KILL]: removed_items, added_items = self._get_changed_items(items, updates) # we raise error if correction is done on a empty package. Kill is fine. if len(removed_items) == len(items) and len(added_items) == 0 and self.publish_type == ITEM_CORRECT: raise SuperdeskApiError.badRequestError("Corrected package cannot be empty!") def raise_if_not_marked_for_publication(self, original): if original.get('flags', {}).get('marked_for_not_publication', False): raise SuperdeskApiError.badRequestError('Cannot publish an item which is marked as Not for Publication') def raise_if_invalid_state_transition(self, original): if not is_workflow_state_transition_valid(self.publish_type, original[ITEM_STATE]): error_message = "Can't {} as item state is {}" if original[ITEM_TYPE] == CONTENT_TYPE.TEXT else \ "Can't {} as either package state or one of the items state is {}" raise InvalidStateTransitionError(error_message.format(self.publish_type, original[ITEM_STATE])) def get_digital_id_for_package_item(self, package_item): """ Finds the digital item id for a given item in a package :param package_item: item in a package :return string: Digital item id if there's one otherwise id of package_item """ if package_item[ITEM_TYPE] not in [CONTENT_TYPE.TEXT, CONTENT_TYPE.PREFORMATTED]: return package_item[config.ID_FIELD] else: package_item_takes_package_id = self.takes_package_service.get_take_package_id(package_item) if not package_item_takes_package_id: return package_item[config.ID_FIELD] return package_item_takes_package_id def _process_publish_updates(self, original, updates): """ Common updates for published items """ desk = None if original.get('task', {}).get('desk'): desk = get_resource_service('desks').find_one(req=None, _id=original['task']['desk']) if not original.get('ingest_provider'): updates['source'] = desk['source'] if desk and desk.get('source', '') \ else app.settings['DEFAULT_SOURCE_VALUE_FOR_MANUAL_ARTICLES'] updates['pubstatus'] = PUB_STATUS.CANCELED if self.publish_type == 'kill' else PUB_STATUS.USABLE self._set_item_expiry(updates, original) def _set_item_expiry(self, updates, original): """ Set the expiry for the item :param dict updates: doc on which publishing action is performed """ desk_id = original.get('task', {}).get('desk') stage_id = original.get('task', {}).get('stage') if EMBARGO in updates or PUBLISH_SCHEDULE in updates: offset = get_utc_schedule(updates, PUBLISH_SCHEDULE) or get_utc_schedule(updates, EMBARGO) elif EMBARGO in original or PUBLISH_SCHEDULE in original: offset = get_utc_schedule(original, PUBLISH_SCHEDULE) or get_utc_schedule(original, EMBARGO) updates['expiry'] = get_expiry(desk_id, stage_id, offset=offset) def _is_take_item(self, item): """ Returns True if the item was a take """ return item[ITEM_TYPE] != CONTENT_TYPE.COMPOSITE and \ (not (item.get('targeted_for') or is_genre(item, BROADCAST_GENRE))) def process_takes(self, updates_of_take_to_be_published, package, original_of_take_to_be_published=None): """ Primary rule for publishing a Take in Takes Package is: all previous takes must be published before a take can be published. Also, generates body_html of the takes package and make sure the metadata for the package is the same as the metadata of the take to be published. :param dict updates_of_take_to_be_published: updates for the take to be published :param dict package: Takes package to publish :param dict original_of_take_to_be_published: original of the take to be published :return: Takes Package Updates """ takes = self.takes_package_service.get_published_takes(package) body_html = updates_of_take_to_be_published.get('body_html', original_of_take_to_be_published.get('body_html', '')) package_updates = {} groups = package.get(GROUPS, []) if groups: take_refs = [ref for group in groups if group['id'] == 'main' for ref in group.get('refs')] sequence_num_of_take_to_be_published = 0 take_article_id = updates_of_take_to_be_published.get( config.ID_FIELD, original_of_take_to_be_published[config.ID_FIELD]) for r in take_refs: if r[GUID_FIELD] == take_article_id: sequence_num_of_take_to_be_published = r[SEQUENCE] r['is_published'] = True break if takes and self.published_state != 'killed': body_html_list = [take.get('body_html', '') for take in takes] if self.published_state == 'published': body_html_list.append(body_html) else: body_html_list[sequence_num_of_take_to_be_published - 1] = body_html package_updates['body_html'] = '<br>'.join(body_html_list) else: package_updates['body_html'] = body_html metadata_tobe_copied = self.takes_package_service.fields_for_creating_take.copy() metadata_tobe_copied.extend([PUBLISH_SCHEDULE, SCHEDULE_SETTINGS, 'byline']) updated_take = original_of_take_to_be_published.copy() updated_take.update(updates_of_take_to_be_published) metadata_from = updated_take # this rules has changed to use the last published metadata # per ticket SD-3885 # if self.published_state == 'corrected' and len(takes) > 1: # # get the last take metadata only if there are more than one takes # metadata_from = takes[-1] for metadata in metadata_tobe_copied: if metadata in metadata_from: package_updates[metadata] = metadata_from.get(metadata) if self.published_state == 'killed': # if published then update the groups in the take # to reflect the correct version, headline and slugline archive_service = get_resource_service(ARCHIVE) for ref in take_refs: if ref.get(RESIDREF) != take_article_id: archive_item = archive_service.find_one(req=None, _id=ref.get(RESIDREF)) ref['headline'] = archive_item.get('headline') ref['slugline'] = archive_item.get('slugline') ref[config.VERSION] = archive_item.get(config.VERSION) take_ref = next((ref for ref in take_refs if ref.get(RESIDREF) == take_article_id), None) if take_ref: # for published take update the version, headline and slugline take_ref['headline'] = updated_take.get('headline') take_ref['slugline'] = updated_take.get('slugline') take_ref[config.VERSION] = updated_take.get(config.VERSION) package_updates[GROUPS] = groups return package_updates def _publish_package_items(self, package, updates): """ Publishes all items of a package recursively then publishes the package itself :param package: package to publish :param updates: payload """ items = self.package_service.get_residrefs(package) if len(items) == 0 and self.publish_type == ITEM_PUBLISH: raise SuperdeskApiError.badRequestError("Empty package cannot be published!") removed_items = [] if self.publish_type in [ITEM_CORRECT, ITEM_KILL]: removed_items, added_items = self._get_changed_items(items, updates) # we raise error if correction is done on a empty package. Kill is fine. if len(removed_items) == len(items) and len(added_items) == 0 and self.publish_type == ITEM_CORRECT: raise SuperdeskApiError.badRequestError("Corrected package cannot be empty!") items.extend(added_items) if items: archive_publish = get_resource_service('archive_publish') for guid in items: package_item = super().find_one(req=None, _id=guid) if not package_item: raise SuperdeskApiError.badRequestError( "Package item with id: {} does not exist.".format(guid)) if package_item[ITEM_STATE] not in PUBLISH_STATES: # if the item is not published then publish it if package_item[ITEM_TYPE] == CONTENT_TYPE.COMPOSITE: # if the item is a package do recursion to publish sub_updates = {i: updates[i] for i in ['state', 'operation'] if i in updates} sub_updates['groups'] = list(package_item['groups']) self._publish_package_items(package_item, sub_updates) self._update_archive(original=package_item, updates=sub_updates, should_insert_into_versions=False) else: # publish the item package_item[PUBLISHED_IN_PACKAGE] = package[config.ID_FIELD] archive_publish.patch(id=package_item.pop(config.ID_FIELD), updates=package_item) insert_into_versions(id_=guid) elif guid in removed_items: # remove the package information from the package item. linked_in_packages = [linked for linked in package_item.get(LINKED_IN_PACKAGES) if linked.get(PACKAGE) != package.get(config.ID_FIELD)] super().system_update(guid, {LINKED_IN_PACKAGES: linked_in_packages}, package_item) package_item = super().find_one(req=None, _id=guid) self.package_service.update_field_in_package(updates, package_item[config.ID_FIELD], config.VERSION, package_item[config.VERSION]) updated = deepcopy(package) updated.update(updates) self.update_published_collection(published_item_id=package[config.ID_FIELD], updated=updated) def update_published_collection(self, published_item_id, updated=None): """ Updates the published collection with the published item. Set the last_published_version to false for previous versions of the published items. :param: str published_item_id: _id of the document. """ published_item = super().find_one(req=None, _id=published_item_id) published_item = copy(published_item) if updated: published_item.update(updated) published_item['is_take_item'] = self.takes_package_service.get_take_package_id(published_item) is not None if not published_item.get('digital_item_id'): published_item['digital_item_id'] = self.get_digital_id_for_package_item(published_item) get_resource_service(PUBLISHED).update_published_items(published_item_id, LAST_PUBLISHED_VERSION, False) return get_resource_service(PUBLISHED).post([published_item]) def set_state(self, original, updates): """ Set the state of the document based on the action (publish, correction, kill) :param dict original: original document :param dict updates: updates related to document """ updates[PUBLISH_SCHEDULE] = None updates[SCHEDULE_SETTINGS] = {} updates[ITEM_STATE] = self.published_state def _set_updates(self, original, updates, last_updated, preserve_state=False): """ Sets config.VERSION, config.LAST_UPDATED, ITEM_STATE in updates document. If item is being published and embargo is available then append Editorial Note with 'Embargoed'. :param dict original: original document :param dict updates: updates related to the original document :param datetime last_updated: datetime of the updates. """ if not preserve_state: self.set_state(original, updates) updates.setdefault(config.LAST_UPDATED, last_updated) if original[config.VERSION] == updates.get(config.VERSION, original[config.VERSION]): resolve_document_version(document=updates, resource=ARCHIVE, method='PATCH', latest_doc=original) if updates.get(EMBARGO, original.get(EMBARGO)) \ and updates.get('ednote', original.get('ednote', '')).find('Embargo') == -1: updates['ednote'] = '{} {}'.format(original.get('ednote', ''), 'Embargoed.').strip() user = get_user() if user and user.get(config.ID_FIELD): updates['version_creator'] = user[config.ID_FIELD] def _update_archive(self, original, updates, versioned_doc=None, should_insert_into_versions=True): """ Updates the articles into archive collection and inserts the latest into archive_versions. Also clears autosaved versions if any. :param: versioned_doc: doc which can be inserted into archive_versions :param: should_insert_into_versions if True inserts the latest document into versions collection """ self.backend.update(self.datasource, original[config.ID_FIELD], updates, original) if should_insert_into_versions: if versioned_doc is None: insert_into_versions(id_=original[config.ID_FIELD]) else: insert_into_versions(doc=versioned_doc) get_component(ItemAutosave).clear(original[config.ID_FIELD]) def _get_changed_items(self, existing_items, updates): """ Returns the added and removed items from existing_items :param existing_items: Existing list :param updates: Changes :return: list of removed items and list of added items """ if 'groups' in updates: new_items = self.package_service.get_residrefs(updates) removed_items = list(set(existing_items) - set(new_items)) added_items = list(set(new_items) - set(existing_items)) return removed_items, added_items else: return [], [] def _validate_package_contents(self, package, takes_package, validation_errors=[]): """ If the item passed is a package this function will ensure that the unpublished content validates and none of the content is locked by other than the publishing session, also do not allow any killed or spiked content :param package: :param takes_package: :param validation_errors: validation errors are appended if there are any. """ # Ensure it is the sort of thing we need to validate if package[ITEM_TYPE] == CONTENT_TYPE.COMPOSITE and not takes_package and self.publish_type == ITEM_PUBLISH: items = self.package_service.get_residrefs(package) # make sure package is not scheduled or spiked if package[ITEM_STATE] in (CONTENT_STATE.SPIKED, CONTENT_STATE.SCHEDULED): validation_errors.append('Package cannot be {}'.format(package[ITEM_STATE])) if package.get(EMBARGO): validation_errors.append('Package cannot have Embargo') if items: for guid in items: doc = super().find_one(req=None, _id=guid) if package[ITEM_TYPE] == CONTENT_TYPE.COMPOSITE: digital = self.takes_package_service.get_take_package(doc) or {} self._validate_package_contents(doc, digital, validation_errors) # make sure no items are killed or spiked or scheduled if doc[ITEM_STATE] in (CONTENT_STATE.KILLED, CONTENT_STATE.SPIKED, CONTENT_STATE.SCHEDULED): validation_errors.append('Package cannot contain {} item'.format(doc[ITEM_STATE])) if doc.get(EMBARGO): validation_errors.append('Package cannot have Items with Embargo') # don't validate items that already have published if doc[ITEM_STATE] not in [CONTENT_STATE.PUBLISHED, CONTENT_STATE.CORRECTED]: validate_item = {'act': self.publish_type, 'type': doc[ITEM_TYPE], 'validate': doc} errors = get_resource_service('validate').post([validate_item], headline=True) if errors[0]: validation_errors.extend(errors[0]) # check the locks on the items if doc.get('lock_session', None) and package['lock_session'] != doc['lock_session']: validation_errors.extend(['{}: packaged item cannot be locked'.format(doc['headline'])]) def _import_into_legal_archive(self, doc): """ Import into legal archive async :param {dict} doc: document to be imported """ if doc.get(ITEM_STATE) != CONTENT_STATE.SCHEDULED: kwargs = { 'item_id': doc.get(config.ID_FIELD) } # countdown=3 is for elasticsearch to be refreshed with archive and published changes import_into_legal_archive.apply_async(countdown=3, kwargs=kwargs) # @UndefinedVariable def _publish_associations(self, parent, guid): """Publish parent item associations.""" associations = parent.get('associations', {}) for rel, item in associations.copy().items(): if item.get('pubstatus', 'usable') != 'usable': associations.pop(rel) continue self._publish_renditions(item, rel, guid) def _publish_renditions(self, item, rel, guid): """Publish item renditions.""" images = [] renditions = item.get('renditions', {}) original = renditions.get('original') crop_service = CropService() for rendition_name, rendition in renditions.items(): crop = get_crop(rendition) rend_spec = crop_service.get_crop_by_name(rendition_name) if crop and rend_spec: file_name = '%s/%s/%s' % (guid, rel, rendition_name) rendition['media'] = app.media.media_id(file_name, original.get('mimetype')) rendition['href'] = app.media.url_for_media(rendition['media'], original.get('mimetype')) rendition['width'] = rend_spec.get('width') rendition['height'] = rend_spec.get('height') rendition['ratio'] = rend_spec.get('ratio') rendition['mimetype'] = original.get('mimetype') images.append({ 'rendition': rendition_name, 'file_name': file_name, 'media': rendition['media'], 'spec': rend_spec, 'crop': crop, }) publish_images.delay(images=images, original=original, item=item)
class EnqueueService: """ Creates the corresponding entries in the publish queue for items marked for publishing """ publish_type = 'publish' published_state = 'published' non_digital = partial( filter, lambda s: s.get('subscriber_type', '') == SUBSCRIBER_TYPES.WIRE) digital = partial( filter, lambda s: (s.get('subscriber_type', '') in {SUBSCRIBER_TYPES.DIGITAL, SUBSCRIBER_TYPES.ALL})) takes_package_service = TakesPackageService() package_service = PackageService() def _enqueue_item(self, item): if item[ITEM_TYPE] == CONTENT_TYPE.COMPOSITE and item.get( PACKAGE_TYPE): return self.publish(doc=item, target_media_type=SUBSCRIBER_TYPES.DIGITAL) elif item[ITEM_TYPE] == CONTENT_TYPE.COMPOSITE: return self._publish_package_items(item) elif item[ITEM_TYPE] not in [ CONTENT_TYPE.TEXT, CONTENT_TYPE.PREFORMATTED ]: return self.publish(item, SUBSCRIBER_TYPES.DIGITAL) else: return self.publish( item, SUBSCRIBER_TYPES.WIRE if item.get('is_take_item') else None) def _publish_package_items(self, package): """ Publishes all items of a package recursively then publishes the package itself :param package: package to publish :param updates: payload """ items = self.package_service.get_residrefs(package) subscriber_items = {} queued = False removed_items = [] if self.publish_type in ['correct', 'kill']: removed_items, added_items = self._get_changed_items( items, package) # we raise error if correction is done on a empty package. Kill is fine. if len(removed_items) == len(items) and len( added_items) == 0 and self.publish_type == 'correct': raise SuperdeskApiError.badRequestError( "Corrected package cannot be empty!") items.extend(added_items) if items: archive_service = get_resource_service('archive') for guid in items: package_item = archive_service.find_one(req=None, _id=guid) if not package_item: raise SuperdeskApiError.badRequestError( "Package item with id: {} has not been published.". format(guid)) subscribers, subscriber_codes = self._get_subscribers_for_package_item( package_item) digital_item_id = BasePublishService( ).get_digital_id_for_package_item(package_item) self._extend_subscriber_items(subscriber_items, subscribers, package_item, digital_item_id, subscriber_codes) for removed_id in removed_items: package_item = archive_service.find_one(req=None, _id=removed_id) subscribers, subscriber_codes = self._get_subscribers_for_package_item( package_item) digital_item_id = None self._extend_subscriber_items(subscriber_items, subscribers, package_item, digital_item_id, subscriber_codes) queued = self.publish_package(package, target_subscribers=subscriber_items) return queued def _get_changed_items(self, existing_items, package): """ Returns the added and removed items from existing_items :param existing_items: Existing list :param updates: Changes :return: list of removed items and list of added items """ published_service = get_resource_service('published') req = ParsedRequest() query = { 'query': { 'filtered': { 'filter': { 'and': [{ 'term': { QUEUE_STATE: PUBLISH_STATE.QUEUED } }, { 'term': { 'item_id': package['item_id'] } }] } } }, 'sort': [{ 'publish_sequence_no': 'desc' }] } req.args = {'source': json.dumps(query)} req.max_results = 1000 previously_published_packages = published_service.get(req=req, lookup=None) previously_published_package = previously_published_packages[0] if 'groups' in previously_published_package: old_items = self.package_service.get_residrefs( previously_published_package) added_items = list(set(existing_items) - set(old_items)) removed_items = list(set(old_items) - set(existing_items)) return removed_items, added_items else: return [], [] def enqueue_item(self, item): """ Creates the corresponding entries in the publish queue for the given item :return bool: True if item is queued else false. """ try: return self._enqueue_item(item) except SuperdeskApiError as e: raise e except KeyError as e: raise SuperdeskApiError.badRequestError( message="Key is missing on article to be published: {}".format( str(e))) except Exception as e: logger.exception( "Something bad happened while publishing %s".format(id)) raise SuperdeskApiError.internalError( message="Failed to publish the item: {}".format(str(e))) def get_subscribers(self, doc, target_media_type): """ Get subscribers for doc based on target_media_type. Override this method in the ArchivePublishService, ArchiveCorrectService and ArchiveKillService :param doc: Document to publish/correct/kill :param target_media_type: dictate if the doc being queued is a Takes Package or an Individual Article. Valid values are - Wire, Digital. If Digital then the doc being queued is a Takes Package and if Wire then the doc being queues is an Individual Article. :return: (list, list) List of filtered subscriber, List of subscribers that have not received item previously (empty list in this case). """ raise NotImplementedError() def publish(self, doc, target_media_type=None): """ Queue the content for publishing. 1. Get the subscribers. 2. Update the headline of wire stories with the sequence 3. Queue the content for subscribers 4. Queue the content for previously published subscribers if any. 5. Sends notification if no formatter has found for any of the formats configured in Subscriber. 6. If not queued and not formatters then raise exception. :param dict doc: document to publish :param str target_media_type: dictate if the doc being queued is a Takes Package or an Individual Article. Valid values are - Wire, Digital. If Digital then the doc being queued is a Takes Package and if Wire then the doc being queues is an Individual Article. :return bool: if content is queued then True else False :raises PublishQueueError.item_not_queued_error: If the nothing is queued. """ # Step 1 subscribers, subscribers_yet_to_receive, subscriber_codes = self.get_subscribers( doc, target_media_type) # Step 2 if target_media_type == SUBSCRIBER_TYPES.WIRE: self._update_headline_sequence(doc) # Step 3 no_formatters, queued = self.queue_transmission( deepcopy(doc), subscribers, subscriber_codes) # Step 4 if subscribers_yet_to_receive: formatters_not_found, queued_new_subscribers = \ self.queue_transmission(deepcopy(doc), subscribers_yet_to_receive, subscriber_codes) no_formatters.extend(formatters_not_found) queued = queued or queued_new_subscribers # Step 5 self._push_formatter_notification(doc, no_formatters) # Step 6 if not target_media_type and not queued: logger.exception( 'Nothing is saved to publish queue for story: {} for action: {}' .format(doc[config.ID_FIELD], self.publish_type)) return queued def _push_formatter_notification(self, doc, no_formatters=[]): if len(no_formatters) > 0: user = get_user() push_notification('item:publish:wrong:format', item=str(doc[config.ID_FIELD]), unique_name=doc['unique_name'], desk=str(doc.get('task', {}).get('desk', '')), user=str(user.get(config.ID_FIELD, '')), formats=no_formatters) def _get_subscriber_codes(self, subscribers): subscriber_codes = {} all_products = list( get_resource_service('products').get(req=None, lookup=None)) for subscriber in subscribers: codes = self._get_codes(subscriber) products = [ p for p in all_products if p[config.ID_FIELD] in subscriber.get('products', []) ] for product in products: codes.extend(self._get_codes(product)) subscriber_codes[subscriber[config.ID_FIELD]] = list( set(codes)) return subscriber_codes def resend(self, doc, subscribers): subscriber_codes = self._get_subscriber_codes(subscribers) wire_subscribers = list(self.non_digital(subscribers)) digital_subscribers = list(self.digital(subscribers)) if len(wire_subscribers) > 0: doc['item_id'] = doc[config.ID_FIELD] self._resend_to_subscribers(doc, wire_subscribers, subscriber_codes) if len(digital_subscribers) > 0: package = self.takes_package_service.get_take_package(doc) package['item_id'] = package[config.ID_FIELD] self._resend_to_subscribers(package, digital_subscribers, subscriber_codes) def _resend_to_subscribers(self, doc, subscribers, subscriber_codes): formatter_messages, queued = self.queue_transmission( doc, subscribers, subscriber_codes) self._push_formatter_notification(doc, formatter_messages) if not queued: logger.exception( 'Nothing is saved to publish queue for story: {} for action: {}' .format(doc[config.ID_FIELD], 'resend')) def publish_package(self, package, target_subscribers): """ Publishes a given non-take package to given subscribers. For each subscriber updates the package definition with the wanted_items for that subscriber and removes unwanted_items that doesn't supposed to go that subscriber. Text stories are replaced by the digital versions. :param package: Package to be published :param target_subscribers: List of subscriber and items-per-subscriber """ all_items = self.package_service.get_residrefs(package) no_formatters, queued = [], False for items in target_subscribers.values(): updated = deepcopy(package) subscriber = items['subscriber'] codes = items['codes'] wanted_items = [ item for item in items['items'] if items['items'].get(item, None) ] unwanted_items = [ item for item in all_items if item not in wanted_items ] for i in unwanted_items: still_items_left = self.package_service.remove_ref_from_inmem_package( updated, i) if not still_items_left and self.publish_type != 'correct': # if nothing left in the package to be published and # if not correcting then don't send the package return for key in wanted_items: self.package_service.replace_ref_in_package( updated, key, items['items'][key]) formatters, temp_queued = self.queue_transmission( updated, [subscriber], {subscriber[config.ID_FIELD]: codes}) no_formatters.extend(formatters) if temp_queued: queued = temp_queued return queued def queue_transmission(self, doc, subscribers, subscriber_codes={}): """ Method formats and then queues the article for transmission to the passed subscribers. ::Important Note:: Format Type across Subscribers can repeat. But we can't have formatted item generated once based on the format_types configured across for all the subscribers as the formatted item must have a published sequence number generated by Subscriber. :param dict doc: document to queue for transmission :param list subscribers: List of subscriber dict. :return : (list, bool) tuple of list of missing formatters and boolean flag. True if queued else False """ try: queued = False no_formatters = [] for subscriber in subscribers: try: if doc[ITEM_TYPE] not in [CONTENT_TYPE.TEXT, CONTENT_TYPE.PREFORMATTED] and \ subscriber.get('subscriber_type', '') == SUBSCRIBER_TYPES.WIRE: # wire subscribers can get only text and preformatted stories continue for destination in subscriber['destinations']: embed_package_items = doc[ITEM_TYPE] == CONTENT_TYPE.COMPOSITE and \ PACKAGE_TYPE not in doc and destination['config'].get('packaged', False) if embed_package_items: doc = self._embed_package_items(doc) # Step 2(a) formatter = get_formatter(destination['format'], doc) if not formatter: # if formatter not found then record it no_formatters.append(destination['format']) continue formatted_docs = formatter.format( doc, subscriber, subscriber_codes.get(subscriber[config.ID_FIELD])) for idx, publish_data in enumerate(formatted_docs): if not isinstance(publish_data, dict): pub_seq_num, formatted_doc = publish_data formatted_docs[idx] = { 'published_seq_num': pub_seq_num, 'formatted_item': formatted_doc } else: assert 'published_seq_num' in publish_data and 'formatted_item' in publish_data,\ "missing keys in publish_data" for publish_queue_item in formatted_docs: publish_queue_item['item_id'] = doc['item_id'] publish_queue_item['item_version'] = doc[ config.VERSION] publish_queue_item['subscriber_id'] = subscriber[ config.ID_FIELD] publish_queue_item['codes'] = subscriber_codes.get( subscriber[config.ID_FIELD]) publish_queue_item['destination'] = destination # publish_schedule is just to indicate in the queue item is create via scheduled item publish_queue_item[ PUBLISH_SCHEDULE] = get_utc_schedule( doc, PUBLISH_SCHEDULE) or None publish_queue_item['unique_name'] = doc.get( 'unique_name', None) publish_queue_item['content_type'] = doc.get( 'type', None) publish_queue_item['headline'] = doc.get( 'headline', None) publish_queue_item[ 'publishing_action'] = self.published_state publish_queue_item['ingest_provider'] = \ ObjectId(doc.get('ingest_provider')) if doc.get('ingest_provider') else None if doc.get(PUBLISHED_IN_PACKAGE): publish_queue_item[PUBLISHED_IN_PACKAGE] = doc[ PUBLISHED_IN_PACKAGE] try: encoded_item = publish_queue_item.pop( 'encoded_item') except KeyError: pass else: binary = io.BytesIO(encoded_item) publish_queue_item[ 'encoded_item_id'] = app.storage.put( binary) publish_queue_item.pop(ITEM_STATE, None) get_resource_service('publish_queue').post( [publish_queue_item]) queued = True except: logger.exception( "Failed to queue item for id {} with headline {} for subscriber {}." .format(doc.get(config.ID_FIELD), doc.get('headline'), subscriber.get('name'))) return no_formatters, queued except: raise def _embed_package_items(self, package): """ Embeds all package items in the package document """ for group in package.get(GROUPS, []): if group[GROUP_ID] == ROOT_GROUP: continue for ref in group[REFS]: if RESIDREF not in ref: continue package_item = get_resource_service('published').find_one( req=None, item_id=ref[RESIDREF], _current_version=ref[config.VERSION]) if not package_item: msg = 'Can not find package %s published item %s' % ( package['item_id'], ref['residRef']) raise SuperdeskPublishError(500, msg) package_item[config.ID_FIELD] = package_item['item_id'] ref['package_item'] = package_item return package def _update_headline_sequence(self, doc): """ Updates the headline of the text story if there's any sequence value in it """ if doc.get(SEQUENCE): doc['headline'] = '{}={}'.format(doc['headline'], doc.get(SEQUENCE)) def _get_subscribers_for_package_item(self, package_item): """ Finds the list of subscribers for a given item in a package :param package_item: item in a package :return list: List of subscribers :return string: Digital item id if there's one otherwise None """ if package_item[ITEM_TYPE] not in [ CONTENT_TYPE.TEXT, CONTENT_TYPE.PREFORMATTED ]: query = { '$and': [{ 'item_id': package_item[config.ID_FIELD] }, { 'publishing_action': package_item[ITEM_STATE] }] } else: package_item_takes_package = self.takes_package_service.get_take_package( package_item) if not package_item_takes_package: # this item has not been published to digital subscribers so # the list of subscribers are empty return [], {} query = { '$and': [{ 'item_id': package_item_takes_package[config.ID_FIELD] }, { 'publishing_action': package_item_takes_package[ITEM_STATE] }] } return self._get_subscribers_for_previously_sent_items(query) def _get_subscribers_for_previously_sent_items(self, lookup): """ Returns list of subscribers that have previously received the item. :param dict lookup: elastic query to filter the publish queue :return: list of subscribers and list of product codes per subscriber """ req = ParsedRequest() subscribers = [] subscriber_codes = {} queued_items = list( get_resource_service('publish_queue').get(req=req, lookup=lookup)) if len(queued_items) > 0: subscriber_ids = { queued_item['subscriber_id'] for queued_item in queued_items } subscriber_codes = { q['subscriber_id']: q.get('codes', []) for q in queued_items } query = { '$and': [{ config.ID_FIELD: { '$in': list(subscriber_ids) } }] } subscribers = list( get_resource_service('subscribers').get(req=None, lookup=query)) return subscribers, subscriber_codes def filter_subscribers(self, doc, subscribers, target_media_type): """ Filter subscribers to whom the current document is going to be delivered. :param doc: Document to publish/kill/correct :param subscribers: List of Subscribers that might potentially get this document :param target_media_type: dictate if the doc being queued is a Takes Package or an Individual Article. Valid values are - Wire, Digital. If Digital then the doc being queued is a Takes Package and if Wire then the doc being queues is an Individual Article. :return: List of of filtered subscribers and list of product codes per subscriber. """ filtered_subscribers = [] subscriber_codes = {} req = ParsedRequest() req.args = {'is_global': True} filter_service = get_resource_service('content_filters') existing_products = { p[config.ID_FIELD]: p for p in list( get_resource_service('products').get(req=req, lookup=None)) } global_filters = list(filter_service.get(req=req, lookup=None)) for subscriber in subscribers: if target_media_type and subscriber.get( 'subscriber_type', '') != SUBSCRIBER_TYPES.ALL: can_send_takes_packages = subscriber[ 'subscriber_type'] == SUBSCRIBER_TYPES.DIGITAL if target_media_type == SUBSCRIBER_TYPES.WIRE and can_send_takes_packages or \ target_media_type == SUBSCRIBER_TYPES.DIGITAL and not can_send_takes_packages: continue conforms, skip_filters = self.conforms_subscriber_targets( subscriber, doc) if not conforms: continue if not self.conforms_global_filter(subscriber, global_filters, doc): continue product_codes = self._get_codes(subscriber) subscriber_added = False for product_id in subscriber.get('products', []): # check if the product filter conforms with the story product = existing_products.get(product_id) if not product: continue if not self.conforms_product_targets(product, doc): continue if self.conforms_content_filter(product, doc): # gather the codes of products product_codes.extend(self._get_codes(product)) if not subscriber_added: filtered_subscribers.append(subscriber) subscriber_added = True if skip_filters and not subscriber_added: filtered_subscribers.append(subscriber) subscriber_added = True # unify the list of codes by removing duplicates if subscriber_added: subscriber_codes[subscriber[config.ID_FIELD]] = list( set(product_codes)) return filtered_subscribers, subscriber_codes def conforms_product_targets(self, product, article): """ Checks if the given article has any target information and if it does it checks if the product satisfies any of the target information :param product: Product to test :param article: article :return: bool: True if the article conforms the targets for the given product """ geo_restrictions = product.get('geo_restrictions') # If not targeted at all then Return true if not BasePublishService().is_targeted(article, 'target_regions'): return geo_restrictions is None if geo_restrictions: for region in article.get('target_regions', []): if region['qcode'] == geo_restrictions and region['allow']: return True if region['qcode'] != geo_restrictions and not region['allow']: return True return False def conforms_subscriber_targets(self, subscriber, article): """ Checks if the given article has any target information and if it does it checks if the subscriber satisfies any of the target information :param subscriber: Subscriber to test :param article: article :return: bool: True/False if the article conforms the targets bool: True if the given subscriber is specifically targeted, False otherwise """ # If not targeted at all then Return true if not BasePublishService().is_targeted(article, 'target_subscribers') and \ not BasePublishService().is_targeted(article, 'target_types'): return True, False subscriber_type = subscriber.get('subscriber_type') for t in article.get('target_subscribers', []): if str(t.get('_id')) == str(subscriber['_id']): return True, True if subscriber_type: for t in article.get('target_types', []): if t['qcode'] == subscriber_type and t['allow']: return True, False if t['qcode'] != subscriber_type and not t['allow']: return True, False # If there's a region target then continue with the subscriber to check products if BasePublishService().is_targeted(article, 'target_regions'): return True, False # Nothing matches so this subscriber doesn't conform return False, False def conforms_content_filter(self, product, doc): """ Checks if the document matches the subscriber filter :param product: Product where the filter is used :param doc: Document to test the filter against :return: True if there's no filter True if matches and permitting False if matches and blocking False if doesn't match and permitting True if doesn't match and blocking """ content_filter = product.get('content_filter') if content_filter is None or 'filter_id' not in content_filter or content_filter[ 'filter_id'] is None: return True service = get_resource_service('content_filters') filter = service.find_one(req=None, _id=content_filter['filter_id']) does_match = service.does_match(filter, doc) if does_match: return content_filter['filter_type'] == 'permitting' else: return content_filter['filter_type'] == 'blocking' def conforms_global_filter(self, subscriber, global_filters, doc): """ Checks if subscriber has a override rule against each of the global filter and if not checks if document matches the global filter :param subscriber: Subscriber to get if the global filter is overriden :param global_filters: List of all global filters :param doc: Document to test the global filter against :return: True if at least one global filter is not overriden and it matches the document False if global filter matches the document or all of them overriden """ service = get_resource_service('content_filters') gfs = subscriber.get('global_filters', {}) for global_filter in global_filters: if gfs.get(str(global_filter[config.ID_FIELD]), True): # Global filter applies to this subscriber if service.does_match(global_filter, doc): # All global filters behaves like blocking filters return False return True def _extend_subscriber_items(self, subscriber_items, subscribers, item, digital_item_id, subscriber_codes): """ Extends the subscriber_items with the given list of subscribers for the item :param subscriber_items: The existing list of subscribers :param subscribers: New subscribers that item has been published to - to be added :param item: item that has been published :param digital_item_id: digital_item_id """ item_id = item[config.ID_FIELD] for subscriber in subscribers: sid = subscriber[config.ID_FIELD] item_list = subscriber_items.get(sid, {}).get('items', {}) item_list[item_id] = digital_item_id subscriber_items[sid] = { 'subscriber': subscriber, 'items': item_list, 'codes': subscriber_codes.get(sid, []) } def _get_codes(self, item): if item.get('codes'): return [c.strip() for c in item.get('codes').split(',') if c] else: return []
def _validate_take(self, original): takes_service = TakesPackageService() if not takes_service.is_last_takes_package_item(original): raise SuperdeskApiError.badRequestError( message="Only last take of the package can be spiked.")
def _validate_updates(self, original, updates, user): """ Validates updates to the article for the below conditions, if any of them then exception is raised: 1. Is article locked by another user other than the user requesting for update 2. Is state of the article is Killed? 3. Is user trying to update the package with Public Service Announcements? 4. Is user authorized to update unique name of the article? 5. Is user trying to update the genre of a broadcast article? 6. Is article being scheduled and is in a package? 7. Is article being scheduled and schedule timestamp is invalid? 8. Does article has valid crops if the article type is a picture? 9. Is article a valid package if the article type is a package? 10. Does article has a valid Embargo? 11. Make sure that there are no duplicate anpa_category codes in the article. 12. Make sure there are no duplicate subjects in the upadte :raises: SuperdeskApiError.forbiddenError() - if state of the article is killed or user is not authorized to update unique name or if article is locked by another user SuperdeskApiError.badRequestError() - if Public Service Announcements are being added to a package or genre is being updated for a broadcast, is invalid for scheduling, the updates contain duplicate anpa_category or subject codes """ lock_user = original.get('lock_user', None) force_unlock = updates.get('force_unlock', False) str_user_id = str(user.get(config.ID_FIELD)) if user else None if lock_user and str(lock_user) != str_user_id and not force_unlock: raise SuperdeskApiError.forbiddenError('The item was locked by another user') if original.get(ITEM_STATE) == CONTENT_STATE.KILLED: raise SuperdeskApiError.forbiddenError("Item isn't in a valid state to be updated.") if updates.get('body_footer') and is_normal_package(original): raise SuperdeskApiError.badRequestError("Package doesn't support Public Service Announcements") if 'unique_name' in updates and not is_admin(user) \ and (user['active_privileges'].get('metadata_uniquename', 0) == 0): raise SuperdeskApiError.forbiddenError("Unauthorized to modify Unique Name") # if broadcast then update to genre is not allowed. if original.get('broadcast') and updates.get('genre') and \ any(genre.get('value', '').lower() != BROADCAST_GENRE.lower() for genre in updates.get('genre')): raise SuperdeskApiError.badRequestError('Cannot change the genre for broadcast content.') if updates.get('publish_schedule') and original[ITEM_STATE] != CONTENT_STATE.SCHEDULED \ and datetime.datetime.fromtimestamp(0).date() != updates['publish_schedule'].date(): if is_item_in_package(original): raise SuperdeskApiError.badRequestError( 'This item is in a package and it needs to be removed before the item can be scheduled!') package = TakesPackageService().get_take_package(original) or {} validate_schedule(updates['publish_schedule'], package.get(SEQUENCE, 1)) if original[ITEM_TYPE] == CONTENT_TYPE.PICTURE: CropService().validate_multiple_crops(updates, original) elif original[ITEM_TYPE] == CONTENT_TYPE.COMPOSITE: self.packageService.on_update(updates, original) # Do the validation after Circular Reference check passes in Package Service updated = original.copy() updated.update(updates) self.validate_embargo(updated) # Ensure that there are no duplicate categories in the update category_qcodes = [q['qcode'] for q in updates.get('anpa_category', []) or []] if category_qcodes and len(category_qcodes) != len(set(category_qcodes)): raise SuperdeskApiError.badRequestError("Duplicate category codes are not allowed") # Ensure that there are no duplicate subjects in the update subject_qcodes = [q['qcode'] for q in updates.get('subject', []) or []] if subject_qcodes and len(subject_qcodes) != len(set(subject_qcodes)): raise SuperdeskApiError.badRequestError("Duplicate subjects are not allowed")
class BasePublishService(BaseService): """ Base service class for "publish" endpoint """ publish_type = 'publish' published_state = 'published' non_digital = partial(filter, lambda s: s.get('subscriber_type', '') == SUBSCRIBER_TYPES.WIRE) digital = partial(filter, lambda s: (s.get('subscriber_type', '') in {SUBSCRIBER_TYPES.DIGITAL, SUBSCRIBER_TYPES.ALL})) takes_package_service = TakesPackageService() package_service = PackageService() def raise_if_not_marked_for_publication(self, original): if original.get('flags', {}).get('marked_for_not_publication', False): raise SuperdeskApiError.badRequestError('Cannot publish an item which is marked as Not for Publication') def raise_if_invalid_state_transition(self, original): if not is_workflow_state_transition_valid(self.publish_type, original[ITEM_STATE]): error_message = "Can't {} as item state is {}" if original[ITEM_TYPE] == CONTENT_TYPE.TEXT else \ "Can't {} as either package state or one of the items state is {}" raise InvalidStateTransitionError(error_message.format(self.publish_type, original[ITEM_STATE])) def on_update(self, updates, original): self.raise_if_not_marked_for_publication(original) self.raise_if_invalid_state_transition(original) updated = original.copy() updated.update(updates) takes_package = self.takes_package_service.get_take_package(original) if self.publish_type == 'publish': # validate if take can be published if takes_package and not self.takes_package_service.can_publish_take( takes_package, updates.get(SEQUENCE, original.get(SEQUENCE, 1))): raise PublishQueueError.previous_take_not_published_error( Exception("Previous takes are not published.")) validate_schedule(updated.get('publish_schedule'), takes_package.get(SEQUENCE, 1) if takes_package else 1) if original[ITEM_TYPE] != CONTENT_TYPE.COMPOSITE and updates.get(EMBARGO): get_resource_service(ARCHIVE).validate_embargo(updated) if self.publish_type in ['correct', 'kill']: if updates.get(EMBARGO): raise SuperdeskApiError.badRequestError("Embargo can't be set after publishing") if updates.get('dateline'): raise SuperdeskApiError.badRequestError("Dateline can't be modified after publishing") validate_item = {'act': self.publish_type, 'type': original['type'], 'validate': updated} validation_errors = get_resource_service('validate').post([validate_item]) if validation_errors[0]: raise ValidationError(validation_errors) # validate the package if it is one package_validation_errors = [] self._validate_package_contents(original, takes_package, package_validation_errors) if len(package_validation_errors) > 0: raise ValidationError(package_validation_errors) self._set_updates(original, updates, updates.get(config.LAST_UPDATED, utcnow())) updates[ITEM_OPERATION] = ITEM_PUBLISH convert_task_attributes_to_objectId(updates) def on_updated(self, updates, original): self.update_published_collection(published_item_id=original[config.ID_FIELD]) original = get_resource_service(ARCHIVE).find_one(req=None, _id=original[config.ID_FIELD]) updates.update(original) user = get_user() if updates[ITEM_OPERATION] != ITEM_KILL and \ original.get(ITEM_TYPE) in [CONTENT_TYPE.TEXT, CONTENT_TYPE.PREFORMATTED]: get_resource_service('archive_broadcast').on_broadcast_master_updated(updates[ITEM_OPERATION], original) get_resource_service('archive_broadcast').reset_broadcast_status(updates, original) push_notification('item:updated', item=str(original[config.ID_FIELD]), user=str(user.get(config.ID_FIELD))) self._import_into_legal_archive(updates) def update(self, id, updates, original): """ Handles workflow of each Publish, Corrected and Killed. """ try: user = get_user() last_updated = updates.get(config.LAST_UPDATED, utcnow()) auto_publish = updates.pop('auto_publish', False) if original[ITEM_TYPE] == CONTENT_TYPE.COMPOSITE: self._publish_package_items(original, updates) queued_digital = False package = None if original[ITEM_TYPE] != CONTENT_TYPE.COMPOSITE: # if target_for is set the we don't to digital client. if not (updates.get('targeted_for', original.get('targeted_for')) or is_genre(original, BROADCAST_GENRE)): # check if item is in a digital package package = self.takes_package_service.get_take_package(original) if package: queued_digital = self._publish_takes_package(package, updates, original, last_updated) else: ''' If type of the item is text or preformatted then item need to be sent to digital subscribers. So, package the item as a take. ''' updated = copy(original) updated.update(updates) if original[ITEM_TYPE] in {CONTENT_TYPE.TEXT, CONTENT_TYPE.PREFORMATTED} and \ self.sending_to_digital_subscribers(updated): # create a takes package package_id = self.takes_package_service.package_story_as_a_take(updated, {}, None) updates[LINKED_IN_PACKAGES] = updated[LINKED_IN_PACKAGES] package = get_resource_service(ARCHIVE).find_one(req=None, _id=package_id) queued_digital = self._publish_takes_package(package, updates, original, last_updated) # queue only text items media_type = None updated = deepcopy(original) updated.update(updates) if package: media_type = SUBSCRIBER_TYPES.WIRE queued_wire = self.publish(doc=original, updates=updates, target_media_type=media_type) queued = queued_digital or queued_wire if not queued: logger.exception('Nothing is saved to publish queue for story: {} for action: {}'. format(original[config.ID_FIELD], self.publish_type)) self._update_archive(original=original, updates=updates, should_insert_into_versions=auto_publish) push_notification('item:publish', item=str(id), unique_name=original['unique_name'], desk=str(original.get('task', {}).get('desk', '')), user=str(user.get(config.ID_FIELD, ''))) except SuperdeskApiError as e: raise e except KeyError as e: raise SuperdeskApiError.badRequestError( message="Key is missing on article to be published: {}".format(str(e))) except Exception as e: logger.exception("Something bad happened while publishing %s".format(id)) raise SuperdeskApiError.internalError(message="Failed to publish the item: {}".format(str(e))) def _publish_takes_package(self, package, updates, original, last_updated): """ Process the takes to form digital master file content and publish. :param dict package: Takes package :param dict updates: updates for the take :param dict original: original takes :param datetime.datetime last_updated: datetime for the updates :return bool: boolean flag indicating takes package is queued or not """ package_updates = self.process_takes(updates_of_take_to_be_published=updates, original_of_take_to_be_published=original, package=package) self._set_updates(package, package_updates, last_updated) package_updates.setdefault(ITEM_OPERATION, updates.get(ITEM_OPERATION, ITEM_PUBLISH)) self._update_archive(package, package_updates) ''' When embargo is lapsed and the article should go to Digital Subscribers the BasePublishService creates a Takes Package whose state is draft. In this case, we can't initiate post-publish actions on the Takes Package as the package hasn't been published. And post-publish service's get_subscribers() will return empty list. Also, logically without publishing a package post-publish actions on the item doesn't make sense. That's the reason checking the Takes Package state and invoking the appropriate Publish Service. ''' if package[ITEM_STATE] in [CONTENT_STATE.PUBLISHED, CONTENT_STATE.CORRECTED]: package.update(package_updates) queued_digital = self.publish(doc=package, updates=None, target_media_type=SUBSCRIBER_TYPES.DIGITAL) else: package.update(package_updates) queued_digital = get_resource_service('archive_publish').publish(doc=package, updates=None, target_media_type=SUBSCRIBER_TYPES.DIGITAL) self.update_published_collection(published_item_id=package[config.ID_FIELD]) self._import_into_legal_archive(package) return queued_digital def _import_into_legal_archive(self, doc): """ Import into legal archive async :param {dict} doc: document to be imported """ if doc.get(ITEM_STATE) != CONTENT_STATE.SCHEDULED: kwargs = { 'doc': doc } import_into_legal_archive.apply_async(kwargs=kwargs) def _publish_package_items(self, package, updates): """ Publishes all items of a package recursively then publishes the package itself :param package: package to publish :param updates: payload """ items = self.package_service.get_residrefs(package) if len(items) == 0 and self.publish_type == ITEM_PUBLISH: raise SuperdeskApiError.badRequestError("Empty package cannot be published!") removed_items = [] if self.publish_type in [ITEM_CORRECT, ITEM_KILL]: removed_items, added_items = self._get_changed_items(items, updates) # we raise error if correction is done on a empty package. Kill is fine. if len(removed_items) == len(items) and len(added_items) == 0 and self.publish_type == ITEM_CORRECT: raise SuperdeskApiError.badRequestError("Corrected package cannot be empty!") items.extend(added_items) subscriber_items = {} if items: archive_publish = get_resource_service('archive_publish') for guid in items: package_item = super().find_one(req=None, _id=guid) if not package_item: raise SuperdeskApiError.badRequestError( "Package item with id: {} does not exist.".format(guid)) if package_item[ITEM_STATE] not in PUBLISH_STATES: # if the item is not published then publish it if package_item[ITEM_TYPE] == CONTENT_TYPE.COMPOSITE: # if the item is a package do recursion to publish sub_updates = {i: updates[i] for i in ['state', 'operation'] if i in updates} sub_updates['groups'] = list(package_item['groups']) self._publish_package_items(package_item, sub_updates) self._update_archive(original=package_item, updates=sub_updates, should_insert_into_versions=False) self.update_published_collection(published_item_id=package_item[config.ID_FIELD]) else: # publish the item archive_publish.patch(id=package_item.pop(config.ID_FIELD), updates=package_item) insert_into_versions(id_=guid) elif guid in removed_items: # remove the package information from the package item. linked_in_packages = [linked for linked in package_item.get(LINKED_IN_PACKAGES) if linked.get(PACKAGE) != package.get(config.ID_FIELD)] super().system_update(guid, {LINKED_IN_PACKAGES: linked_in_packages}, package_item) package_item = super().find_one(req=None, _id=guid) subscribers = self._get_subscribers_for_package_item(package_item) self.package_service.update_field_in_package(updates, package_item[config.ID_FIELD], config.VERSION, package_item[config.VERSION]) if package_item[config.ID_FIELD] in removed_items: digital_item_id = None else: digital_item_id = self._get_digital_id_for_package_item(package_item) self._extend_subscriber_items(subscriber_items, subscribers, package_item, digital_item_id) self.publish_package(package, updates, target_subscribers=subscriber_items) def _extend_subscriber_items(self, subscriber_items, subscribers, item, digital_item_id): """ Extends the subscriber_items with the given list of subscribers for the item :param subscriber_items: The existing list of subscribers :param subscribers: New subscribers that item has been published to - to be added :param item: item that has been published :param digital_item_id: digital_item_id """ item_id = item[config.ID_FIELD] for subscriber in subscribers: sid = subscriber[config.ID_FIELD] item_list = subscriber_items.get(sid, {}).get('items', {}) item_list[item_id] = digital_item_id subscriber_items[sid] = {'subscriber': subscriber, 'items': item_list} def _get_changed_items(self, existing_items, updates): """ Returns the added and removed items from existing_items :param existing_items: Existing list :param updates: Changes :return: list of removed items and list of added items """ if 'groups' in updates: new_items = self.package_service.get_residrefs(updates) removed_items = list(set(existing_items) - set(new_items)) added_items = list(set(new_items) - set(existing_items)) return removed_items, added_items else: return [], [] def _get_digital_id_for_package_item(self, package_item): """ Finds the digital item id for a given item in a package :param package_item: item in a package :return string: Digital item id if there's one otherwise id of package_item """ if package_item[ITEM_TYPE] not in [CONTENT_TYPE.TEXT, CONTENT_TYPE.PREFORMATTED]: return package_item[config.ID_FIELD] else: package_item_takes_package_id = self.takes_package_service.get_take_package_id(package_item) if not package_item_takes_package_id: return package_item[config.ID_FIELD] return package_item_takes_package_id def _get_subscribers_for_package_item(self, package_item): """ Finds the list of subscribers for a given item in a package :param package_item: item in a package :return list: List of subscribers :return string: Digital item id if there's one otherwise None """ if package_item[ITEM_TYPE] not in [CONTENT_TYPE.TEXT, CONTENT_TYPE.PREFORMATTED]: query = {'$and': [{'item_id': package_item[config.ID_FIELD]}, {'publishing_action': package_item[ITEM_STATE]}]} else: package_item_takes_package = self.takes_package_service.get_take_package(package_item) if not package_item_takes_package: # this item has not been published to digital subscribers so # the list of subscribers are empty return [] query = {'$and': [{'item_id': package_item_takes_package[config.ID_FIELD]}, {'publishing_action': package_item_takes_package[ITEM_STATE]}]} return self._get_subscribers_for_previously_sent_items(query) def _set_updates(self, original, updates, last_updated): """ Sets config.VERSION, config.LAST_UPDATED, ITEM_STATE in updates document. If item is being published and embargo is available then append Editorial Note with 'Embargoed'. :param dict original: original document :param dict updates: updates related to the original document :param datetime last_updated: datetime of the updates. """ self.set_state(original, updates) updates.setdefault(config.LAST_UPDATED, last_updated) if original[config.VERSION] == updates.get(config.VERSION, original[config.VERSION]): resolve_document_version(document=updates, resource=ARCHIVE, method='PATCH', latest_doc=original) if updates.get(EMBARGO, original.get(EMBARGO)) \ and updates.get('ednote', original.get('ednote', '')).find('Embargo') == -1: updates['ednote'] = '{} {}'.format(original.get('ednote', ''), 'Embargoed.').strip() def _update_archive(self, original, updates, versioned_doc=None, should_insert_into_versions=True): """ Updates the articles into archive collection and inserts the latest into archive_versions. Also clears autosaved versions if any. :param: versioned_doc: doc which can be inserted into archive_versions :param: should_insert_into_versions if True inserts the latest document into versions collection """ self.backend.update(self.datasource, original[config.ID_FIELD], updates, original) if should_insert_into_versions: if versioned_doc is None: insert_into_versions(id_=original[config.ID_FIELD]) else: insert_into_versions(doc=versioned_doc) get_component(ItemAutosave).clear(original[config.ID_FIELD]) def set_state(self, original, updates): """ Set the state of the document based on the action (publish, correction, kill) :param dict original: original document :param dict updates: updates related to document """ updates['publish_schedule'] = None updates[ITEM_STATE] = self.published_state def process_takes(self, updates_of_take_to_be_published, package, original_of_take_to_be_published=None): """ Primary rule for publishing a Take in Takes Package is: all previous takes must be published before a take can be published. Also, generates body_html of the takes package and make sure the metadata for the package is the same as the metadata of the take to be published. :param dict updates_of_take_to_be_published: updates for the take to be published :param dict package: Takes package to publish :param dict original_of_take_to_be_published: original of the take to be published :return: Takes Package Updates """ takes = self.takes_package_service.get_published_takes(package) body_html = updates_of_take_to_be_published.get('body_html', original_of_take_to_be_published.get('body_html', '')) package_updates = {} groups = package.get(GROUPS, []) if groups: take_refs = [ref for group in groups if group['id'] == 'main' for ref in group.get('refs')] sequence_num_of_take_to_be_published = 0 take_article_id = updates_of_take_to_be_published.get( config.ID_FIELD, original_of_take_to_be_published[config.ID_FIELD]) for r in take_refs: if r[GUID_FIELD] == take_article_id: sequence_num_of_take_to_be_published = r[SEQUENCE] break if takes and self.published_state != 'killed': body_html_list = [take.get('body_html', '') for take in takes] if self.published_state == 'published': body_html_list.append(body_html) else: body_html_list[sequence_num_of_take_to_be_published - 1] = body_html package_updates['body_html'] = '<br>'.join(body_html_list) else: package_updates['body_html'] = body_html metadata_tobe_copied = self.takes_package_service.fields_for_creating_take.copy() metadata_tobe_copied.extend(['publish_schedule', 'byline']) updated_take = original_of_take_to_be_published.copy() updated_take.update(updates_of_take_to_be_published) metadata_from = updated_take if self.published_state == 'corrected' and len(takes) > 1: # get the last take metadata only if there are more than one takes metadata_from = takes[-1] for metadata in metadata_tobe_copied: if metadata in metadata_from: package_updates[metadata] = metadata_from.get(metadata) package_updates[GROUPS] = groups self.package_service.update_field_in_package(package_updates, original_of_take_to_be_published[config.ID_FIELD], config.VERSION, updates_of_take_to_be_published[config.VERSION]) return package_updates def publish_package(self, package, updates, target_subscribers): """ Publishes a given non-take package to given subscribers. For each subscriber updates the package definition with the wanted_items for that subscriber and removes unwanted_items that doesn't supposed to go that subscriber. Text stories are replaced by the digital versions. :param package: Package to be published :param updates: Updates to the package :param target_subscribers: List of subscriber and items-per-subscriber """ self._process_publish_updates(package, updates) all_items = self.package_service.get_residrefs(package) for items in target_subscribers.values(): updated = deepcopy(package) updates_copy = deepcopy(updates) updated.update(updates_copy) subscriber = items['subscriber'] wanted_items = [item for item in items['items'] if items['items'].get(item, None)] unwanted_items = [item for item in all_items if item not in wanted_items] for i in unwanted_items: still_items_left = self.package_service.remove_ref_from_inmem_package(updated, i) if not still_items_left and self.publish_type != 'correct': # if nothing left in the package to be published and # if not correcting then don't send the package return for key in wanted_items: self.package_service.replace_ref_in_package(updated, key, items['items'][key]) self.queue_transmission(updated, [subscriber]) def _process_publish_updates(self, doc, updates): """ Common updates for published items """ desk = None if doc.get('task', {}).get('desk'): desk = get_resource_service('desks').find_one(req=None, _id=doc['task']['desk']) if not doc.get('ingest_provider'): updates['source'] = desk['source'] if desk and desk.get('source', '') \ else DEFAULT_SOURCE_VALUE_FOR_MANUAL_ARTICLES updates['pubstatus'] = PUB_STATUS.CANCELED if self.publish_type == 'kill' else PUB_STATUS.USABLE def publish(self, doc, updates, target_media_type=None): """ Queue the content for publishing. 1. Sets the Metadata Properties - source and pubstatus 2. Get the subscribers. 3. Update the headline of wire stories with the sequence 4. Queue the content for subscribers 5. Queue the content for previously published subscribers if any. 6. Sends notification if no formatter has found for any of the formats configured in Subscriber. 7. If not queued and not formatters then raise exception. :param dict doc: document to publish :param dict updates: updates for the document :param str target_media_type: dictate if the doc being queued is a Takes Package or an Individual Article. Valid values are - Wire, Digital. If Digital then the doc being queued is a Takes Package and if Wire then the doc being queues is an Individual Article. :param dict target_subscribers: list of subscribers that document needs to get sent :return bool: if content is queued then True else False :raises PublishQueueError.item_not_queued_error: If the nothing is queued. """ queued = True no_formatters = [] updated = doc.copy() # Step 1 if updates: self._process_publish_updates(doc, updates) updated.update(updates) # Step 2 subscribers, subscribers_yet_to_receive = self.get_subscribers(doc, target_media_type) # Step 3 if target_media_type == SUBSCRIBER_TYPES.WIRE: self._update_headline_sequence(updated) # Step 4 no_formatters, queued = self.queue_transmission(updated, subscribers) # Step 5 if subscribers_yet_to_receive: formatters_not_found, queued_new_subscribers = self.queue_transmission(updated, subscribers_yet_to_receive) no_formatters.extend(formatters_not_found) queued = queued or queued_new_subscribers # Step 6 user = get_user() if len(no_formatters) > 0: push_notification('item:publish:wrong:format', item=str(doc[config.ID_FIELD]), unique_name=doc['unique_name'], desk=str(doc.get('task', {}).get('desk', '')), user=str(user.get(config.ID_FIELD, '')), formats=no_formatters) # Step 7 if not target_media_type and not queued: logger.exception('Nothing is saved to publish queue for story: {} for action: {}'. format(doc[config.ID_FIELD], self.publish_type)) return queued def sending_to_digital_subscribers(self, doc): """ Returns False if item has embargo and is in future. Returns True if there is a digital subscriber either in the previously sent or in yet to be sent subscribers :param doc: document :return bool: True if there's at least one """ if doc.get(EMBARGO) and doc.get(EMBARGO) > utcnow(): return False subscribers, subscribers_yet_to_receive = self.get_subscribers(doc, SUBSCRIBER_TYPES.DIGITAL) subscribers = list(self.digital(subscribers)) subscribers_yet_to_receive = list(self.digital(subscribers_yet_to_receive)) return len(subscribers) > 0 or len(subscribers_yet_to_receive) > 0 def get_subscribers(self, doc, target_media_type): """ Get subscribers for doc based on target_media_type. Override this method in the ArchivePublishService, ArchiveCorrectService and ArchiveKillService :param doc: Document to publish/correct/kill :param target_media_type: dictate if the doc being queued is a Takes Package or an Individual Article. Valid values are - Wire, Digital. If Digital then the doc being queued is a Takes Package and if Wire then the doc being queues is an Individual Article. :return: (list, list) List of filtered subscriber, List of subscribers that have not received item previously (empty list in this case). """ raise NotImplementedError() def _get_subscribers_for_previously_sent_items(self, lookup): """ Returns list of subscribers that have previously received the item. :param dict lookup: elastic query to filter the publish queue :return: list of subscribers """ req = ParsedRequest() subscribers = [] queued_items = get_resource_service('publish_queue').get(req=req, lookup=lookup) if queued_items.count(): subscriber_ids = {queued_item['subscriber_id'] for queued_item in queued_items} query = {'$and': [{config.ID_FIELD: {'$in': list(subscriber_ids)}}]} subscribers = list(get_resource_service('subscribers').get(req=None, lookup=query)) return subscribers def filter_subscribers(self, doc, subscribers, target_media_type): """ Filter subscribers to whom the current document is going to be delivered. :param doc: Document to publish/kill/correct :param subscribers: List of Subscribers that might potentially get this document :param target_media_type: dictate if the doc being queued is a Takes Package or an Individual Article. Valid values are - Wire, Digital. If Digital then the doc being queued is a Takes Package and if Wire then the doc being queues is an Individual Article. :return: List of of filtered subscriber. """ filtered_subscribers = [] req = ParsedRequest() req.args = {'is_global': True} service = get_resource_service('content_filters') global_filters = list(service.get(req=req, lookup=None)) for subscriber in subscribers: if target_media_type and subscriber.get('subscriber_type', '') != SUBSCRIBER_TYPES.ALL: can_send_takes_packages = subscriber['subscriber_type'] == SUBSCRIBER_TYPES.DIGITAL if target_media_type == SUBSCRIBER_TYPES.WIRE and can_send_takes_packages or \ target_media_type == SUBSCRIBER_TYPES.DIGITAL and not can_send_takes_packages: continue if doc.get('targeted_for'): found_match = [t for t in doc['targeted_for'] if t['name'] == subscriber.get('subscriber_type', '')] if len(found_match) == 0 and subscriber.get('geo_restrictions'): found_match = [t for t in doc['targeted_for'] if t['name'] == subscriber['geo_restrictions']] if len(found_match) == 0 or found_match[0]['allow'] is False: continue elif len(found_match) > 0 and found_match[0]['allow'] is False: continue if not self.conforms_global_filter(subscriber, global_filters, doc): continue if not self.conforms_content_filter(subscriber, doc): continue filtered_subscribers.append(subscriber) return filtered_subscribers def queue_transmission(self, doc, subscribers): """ Method formats and then queues the article for transmission to the passed subscribers. ::Important Note:: Format Type across Subscribers can repeat. But we can't have formatted item generated once based on the format_types configured across for all the subscribers as the formatted item must have a published sequence number generated by Subscriber. :param dict doc: document to queue for transmission :param list subscribers: List of subscriber dict. :return : (list, bool) tuple of list of missing formatters and boolean flag. True if queued else False """ try: queued = False no_formatters = [] for subscriber in subscribers: try: if doc[ITEM_TYPE] not in [CONTENT_TYPE.TEXT, CONTENT_TYPE.PREFORMATTED] and \ subscriber.get('subscriber_type', '') == SUBSCRIBER_TYPES.WIRE: # wire subscribers can get only text and preformatted stories continue for destination in subscriber['destinations']: # Step 2(a) formatter = get_formatter(destination['format'], doc) if not formatter: # if formatter not found then record it no_formatters.append(destination['format']) continue formatted_docs = formatter.format(doc, subscriber) for pub_seq_num, formatted_doc in formatted_docs: publish_queue_item = dict() publish_queue_item['item_id'] = doc['_id'] publish_queue_item['item_version'] = doc[config.VERSION] publish_queue_item['formatted_item'] = formatted_doc publish_queue_item['subscriber_id'] = subscriber['_id'] publish_queue_item['destination'] = destination publish_queue_item['published_seq_num'] = pub_seq_num publish_queue_item['publish_schedule'] = doc.get('publish_schedule', None) publish_queue_item['unique_name'] = doc.get('unique_name', None) publish_queue_item['content_type'] = doc.get('type', None) publish_queue_item['headline'] = doc.get('headline', None) self.set_state(doc, publish_queue_item) if publish_queue_item.get(ITEM_STATE): publish_queue_item['publishing_action'] = publish_queue_item.get(ITEM_STATE) del publish_queue_item[ITEM_STATE] else: publish_queue_item['publishing_action'] = self.published_state get_resource_service('publish_queue').post([publish_queue_item]) queued = True except: logger.exception("Failed to queue item for id {} with headline {} for subscriber {}." .format(doc.get(config.ID_FIELD), doc.get('headline'), subscriber.get('name'))) return no_formatters, queued except: raise def update_published_collection(self, published_item_id): """ Updates the published collection with the published item. Set the last_published_version to false for previous versions of the published items. :param: str published_item_id: _id of the document. """ published_item = super().find_one(req=None, _id=published_item_id) published_item = copy(published_item) get_resource_service('published').update_published_items(published_item_id, LAST_PUBLISHED_VERSION, False) get_resource_service('published').post([published_item]) def conforms_content_filter(self, subscriber, doc): """ Checks if the document matches the subscriber filter :param subscriber: Subscriber to get the filter :param doc: Document to test the filter against :return: True if there's no filter True if matches and permitting False if matches and blocking False if doesn't match and permitting True if doesn't match and blocking """ content_filter = subscriber.get('content_filter') if content_filter is None or 'filter_id' not in content_filter or content_filter['filter_id'] is None: return True service = get_resource_service('content_filters') filter = service.find_one(req=None, _id=content_filter['filter_id']) does_match = service.does_match(filter, doc) if does_match: return content_filter['filter_type'] == 'permitting' else: return content_filter['filter_type'] == 'blocking' def conforms_global_filter(self, subscriber, global_filters, doc): """ Checks if subscriber has a override rule against each of the global filter and if not checks if document matches the global filter :param subscriber: Subscriber to get if the global filter is overriden :param global_filters: List of all global filters :param doc: Document to test the global filter against :return: True if at least one global filter is not overriden and it matches the document False if global filter matches the document or all of them overriden """ service = get_resource_service('content_filters') gfs = subscriber.get('global_filters', {}) for global_filter in global_filters: if gfs.get(str(global_filter['_id']), True): # Global filter applies to this subscriber if service.does_match(global_filter, doc): # All global filters behaves like blocking filters return False return True def _update_headline_sequence(self, doc): """ Updates the headline of the text story if there's any sequence value in it """ if doc.get(SEQUENCE): doc['headline'] = '{}={}'.format(doc['headline'], doc.get(SEQUENCE)) def _validate_package_contents(self, package, takes_package, validation_errors=[]): """ If the item passed is a package this function will ensure that the unpublished content validates and none of the content is locked by other than the publishing session, also do not allow any killed or spiked content :param package: :param takes_package: :param validation_errors: validation errors are appended if there are any. """ # Ensure it is the sort of thing we need to validate if package[ITEM_TYPE] == CONTENT_TYPE.COMPOSITE and not takes_package and self.publish_type == ITEM_PUBLISH: items = self.package_service.get_residrefs(package) # make sure package is not scheduled or spiked if package[ITEM_STATE] in (CONTENT_STATE.SPIKED, CONTENT_STATE.SCHEDULED): validation_errors.append('Package cannot be {}'.format(package[ITEM_STATE])) if package.get(EMBARGO): validation_errors.append('Package cannot have Embargo') if items: for guid in items: doc = super().find_one(req=None, _id=guid) if package[ITEM_TYPE] == CONTENT_TYPE.COMPOSITE: digital = self.takes_package_service.get_take_package(doc) or {} self._validate_package_contents(doc, digital, validation_errors) # make sure no items are killed or spiked or scheduled if doc[ITEM_STATE] in (CONTENT_STATE.KILLED, CONTENT_STATE.SPIKED, CONTENT_STATE.SCHEDULED): validation_errors.append('Package cannot contain {} item'.format(doc[ITEM_STATE])) if doc.get(EMBARGO): validation_errors.append('Package cannot have Items with Embargo') # don't validate items that already have published if doc[ITEM_STATE] not in [CONTENT_STATE.PUBLISHED, CONTENT_STATE.CORRECTED]: validate_item = {'act': self.publish_type, 'type': doc[ITEM_TYPE], 'validate': doc} errors = get_resource_service('validate').post([validate_item], headline=True) if errors[0]: validation_errors.extend(errors[0]) # check the locks on the items if doc.get('lock_session', None) and package['lock_session'] != doc['lock_session']: validation_errors.extend(['{}: packaged item cannot be locked'.format(doc['headline'])])
def format(self, article, subscriber, codes=None): try: docs = [] formatted_article = deepcopy(article) for category in self._get_category_list(formatted_article.get('anpa_category')): mapped_source = self._get_mapped_source(formatted_article) formatted_article[config.ID_FIELD] = formatted_article.get('item_id', formatted_article.get(config.ID_FIELD)) is_last_take = TakesPackageService().is_last_takes_package_item(formatted_article) is_first_part = formatted_article.get('sequence', 1) == 1 pub_seq_num = superdesk.get_resource_service('subscribers').generate_sequence_number(subscriber) anpa = [] if codes: anpa.append(b'\x05') anpa.append(' '.join(codes).encode('ascii')) anpa.append(b'\x0D\x0A') # start of message header (syn syn soh) anpa.append(b'\x16\x16\x01') anpa.append(get_service_level(category, formatted_article).encode('ascii')) # story number anpa.append(str(pub_seq_num).zfill(4).encode('ascii')) # field seperator anpa.append(b'\x0A') # -LF anpa.append(map_priority(formatted_article.get('priority')).encode('ascii')) anpa.append(b'\x20') anpa.append(category['qcode'].lower().encode('ascii')) anpa.append(b'\x13') # format identifier if formatted_article.get(FORMAT, FORMATS.HTML) == FORMATS.PRESERVED: anpa.append(b'\x12') else: anpa.append(b'\x11') anpa.append(b'\x20') # keyword keyword = 'bc-{}'.format(self.append_legal(article=formatted_article, truncate=True)).replace(' ', '-') keyword = keyword[:24] if len(keyword) > 24 else keyword anpa.append(keyword.encode('ascii')) anpa.append(b'\x20') # version field anpa.append(b'\x20') # reference field anpa.append(b'\x20') # filing date anpa.append('{}-{}'.format(formatted_article['_updated'].strftime('%m'), formatted_article['_updated'].strftime('%d')).encode('ascii')) anpa.append(b'\x20') # add the word count anpa.append(str(formatted_article.get('word_count', '0000')).zfill(4).encode('ascii')) anpa.append(b'\x0D\x0A') anpa.append(b'\x02') # STX self._process_headline(anpa, formatted_article, category['qcode'].encode('ascii')) keyword = SluglineMapper().map(article=formatted_article, category=category['qcode'].upper(), truncate=True).encode('ascii', 'ignore') anpa.append(keyword) take_key = (formatted_article.get('anpa_take_key', '') or '').encode('ascii', 'ignore') anpa.append((b'\x20' + take_key) if len(take_key) > 0 else b'') anpa.append(b'\x0D\x0A') if formatted_article.get(EMBARGO): embargo = '{}{}\r\n'.format('Embargo Content. Timestamp: ', get_utc_schedule(formatted_article, EMBARGO).isoformat()) anpa.append(embargo.encode('ascii', 'replace')) if formatted_article.get('ednote', '') != '': ednote = '{}\r\n'.format(to_ascii(formatted_article.get('ednote'))) anpa.append(ednote.encode('ascii', 'replace')) if formatted_article.get(BYLINE): anpa.append(BeautifulSoup(formatted_article.get(BYLINE), 'html.parser').text.encode ('ascii', 'ignore')) anpa.append(b'\x0D\x0A') if formatted_article.get(FORMAT) == FORMATS.PRESERVED: soup = BeautifulSoup(self.append_body_footer(formatted_article), "html.parser") anpa.append(soup.get_text().encode('ascii', 'replace')) else: body = to_ascii(formatted_article.get('body_html', '')) # we need to inject the dateline if is_first_part and formatted_article.get('dateline', {}).get('text') \ and not article.get('auto_publish', False): soup = BeautifulSoup(body, "html.parser") ptag = soup.find('p') if ptag is not None: ptag.insert(0, NavigableString( '{} '.format(formatted_article.get('dateline').get('text')).encode('ascii', 'ignore'))) body = str(soup) anpa.append(self.get_text_content(body)) if formatted_article.get('body_footer'): anpa.append(self.get_text_content(to_ascii(formatted_article.get('body_footer', '')))) anpa.append(b'\x0D\x0A') if not is_last_take: anpa.append('MORE'.encode('ascii')) else: anpa.append(mapped_source.encode('ascii')) sign_off = (formatted_article.get('sign_off', '') or '').encode('ascii') anpa.append((b'\x20' + sign_off) if len(sign_off) > 0 else b'') anpa.append(b'\x0D\x0A') anpa.append(b'\x03') # ETX # time and date anpa.append(datetime.datetime.now().strftime('%d-%m-%y %H-%M-%S').encode('ascii')) anpa.append(b'\x04') # EOT anpa.append(b'\x0D\x0A\x0D\x0A\x0D\x0A\x0D\x0A\x0D\x0A\x0D\x0A\x0D\x0A\x0D\x0A') docs.append({'published_seq_num': pub_seq_num, 'encoded_item': b''.join(anpa), 'formatted_item': b''.join(anpa).decode('ascii')}) return docs except Exception as ex: raise FormatterError.AnpaFormatterError(ex, subscriber)
class ArchiveService(BaseService): packageService = PackageService() takesService = TakesPackageService() mediaService = ArchiveMediaService() def on_fetched(self, docs): """ Overriding this to handle existing data in Mongo & Elastic """ self.__enhance_items(docs[config.ITEMS]) def on_fetched_item(self, doc): self.__enhance_items([doc]) def __enhance_items(self, items): for item in items: handle_existing_data(item) self.takesService.enhance_with_package_info(item) def on_create(self, docs): on_create_item(docs) for doc in docs: doc['version_creator'] = doc['original_creator'] remove_unwanted(doc) update_word_count(doc) set_item_expiry({}, doc) if doc[ITEM_TYPE] == CONTENT_TYPE.COMPOSITE: self.packageService.on_create([doc]) # Do the validation after Circular Reference check passes in Package Service self.validate_embargo(doc) if doc.get('media'): self.mediaService.on_create([doc]) # let client create version 0 docs if doc.get('version') == 0: doc[config.VERSION] = doc['version'] if not doc.get('ingest_provider'): doc['source'] = DEFAULT_SOURCE_VALUE_FOR_MANUAL_ARTICLES def on_created(self, docs): packages = [ doc for doc in docs if doc[ITEM_TYPE] == CONTENT_TYPE.COMPOSITE ] if packages: self.packageService.on_created(packages) for doc in docs: subject = get_subject(doc) if subject: msg = 'added new {{ type }} item about "{{ subject }}"' else: msg = 'added new {{ type }} item with empty header/title' add_activity(ACTIVITY_CREATE, msg, self.datasource, item=doc, type=doc[ITEM_TYPE], subject=subject) push_content_notification(docs) def on_update(self, updates, original): updates[ITEM_OPERATION] = ITEM_UPDATE is_update_allowed(original) user = get_user() if 'publish_schedule' in updates and original['state'] == 'scheduled': # this is an deschedule action self.deschedule_item(updates, original) # check if there is a takes package and deschedule the takes package. package = TakesPackageService().get_take_package(original) if package and package.get('state') == 'scheduled': package_updates = { 'publish_schedule': None, 'groups': package.get('groups') } self.patch(package.get(config.ID_FIELD), package_updates) return if updates.get('publish_schedule'): if datetime.datetime.fromtimestamp(0).date() == updates.get( 'publish_schedule').date(): # publish_schedule field will be cleared updates['publish_schedule'] = None else: # validate the schedule if is_item_in_package(original): raise SuperdeskApiError.\ badRequestError(message='This item is in a package' + ' it needs to be removed before the item can be scheduled!') package = TakesPackageService().get_take_package( original) or {} validate_schedule(updates.get('publish_schedule'), package.get(SEQUENCE, 1)) if 'unique_name' in updates and not is_admin(user) \ and (user['active_privileges'].get('metadata_uniquename', 0) == 0): raise SuperdeskApiError.forbiddenError( "Unauthorized to modify Unique Name") remove_unwanted(updates) if self.__is_req_for_save(updates): update_state(original, updates) lock_user = original.get('lock_user', None) force_unlock = updates.get('force_unlock', False) updates.setdefault('original_creator', original.get('original_creator')) str_user_id = str(user.get('_id')) if user else None if lock_user and str(lock_user) != str_user_id and not force_unlock: raise SuperdeskApiError.forbiddenError( 'The item was locked by another user') updates['versioncreated'] = utcnow() set_item_expiry(updates, original) updates['version_creator'] = str_user_id set_sign_off(updates, original=original) update_word_count(updates) if force_unlock: del updates['force_unlock'] # create crops crop_service = ArchiveCropService() crop_service.validate_multiple_crops(updates, original) crop_service.create_multiple_crops(updates, original) if original[ITEM_TYPE] == CONTENT_TYPE.COMPOSITE: self.packageService.on_update(updates, original) update_version(updates, original) # Do the validation after Circular Reference check passes in Package Service updated = original.copy() updated.update(updates) self.validate_embargo(updated) def on_updated(self, updates, original): get_component(ItemAutosave).clear(original['_id']) if original[ITEM_TYPE] == CONTENT_TYPE.COMPOSITE: self.packageService.on_updated(updates, original) ArchiveCropService().delete_replaced_crop_files(updates, original) updated = copy(original) updated.update(updates) if config.VERSION in updates: add_activity( ACTIVITY_UPDATE, 'created new version {{ version }} for item {{ type }} about "{{ subject }}"', self.datasource, item=updated, version=updates[config.VERSION], subject=get_subject(updates, original), type=updated[ITEM_TYPE]) push_content_notification([updated, original]) def on_replace(self, document, original): document[ITEM_OPERATION] = ITEM_UPDATE remove_unwanted(document) user = get_user() lock_user = original.get('lock_user', None) force_unlock = document.get('force_unlock', False) user_id = str(user.get('_id')) if lock_user and str(lock_user) != user_id and not force_unlock: raise SuperdeskApiError.forbiddenError( 'The item was locked by another user') document['versioncreated'] = utcnow() set_item_expiry(document, original) document['version_creator'] = user_id if force_unlock: del document['force_unlock'] def on_replaced(self, document, original): get_component(ItemAutosave).clear(original['_id']) add_activity(ACTIVITY_UPDATE, 'replaced item {{ type }} about {{ subject }}', self.datasource, item=original, type=original['type'], subject=get_subject(original)) push_content_notification([document, original]) def on_deleted(self, doc): if doc[ITEM_TYPE] == CONTENT_TYPE.COMPOSITE: self.packageService.on_deleted(doc) remove_media_files(doc) add_activity(ACTIVITY_DELETE, 'removed item {{ type }} about {{ subject }}', self.datasource, item=doc, type=doc[ITEM_TYPE], subject=get_subject(doc)) push_content_notification([doc]) def replace(self, id, document, original): return self.restore_version(id, document, original) or super().replace( id, document, original) def find_one(self, req, **lookup): item = super().find_one(req, **lookup) if item and str(item.get('task', {}).get('stage', '')) in \ get_resource_service('users').get_invisible_stages_ids(get_user().get('_id')): raise SuperdeskApiError.forbiddenError( "User does not have permissions to read the item.") handle_existing_data(item) return item def restore_version(self, id, doc, original): item_id = id old_version = int(doc.get('old_version', 0)) last_version = int(doc.get('last_version', 0)) if (not all([item_id, old_version, last_version])): return None old = get_resource_service('archive_versions').find_one( req=None, _id_document=item_id, _current_version=old_version) if old is None: raise SuperdeskApiError.notFoundError('Invalid version %s' % old_version) curr = get_resource_service(SOURCE).find_one(req=None, _id=item_id) if curr is None: raise SuperdeskApiError.notFoundError('Invalid item id %s' % item_id) if curr[config.VERSION] != last_version: raise SuperdeskApiError.preconditionFailedError( 'Invalid last version %s' % last_version) old['_id'] = old['_id_document'] old['_updated'] = old['versioncreated'] = utcnow() set_item_expiry(old, doc) del old['_id_document'] old[ITEM_OPERATION] = ITEM_RESTORE resolve_document_version(old, SOURCE, 'PATCH', curr) remove_unwanted(old) set_sign_off(updates=old, original=curr) super().replace(id=item_id, document=old, original=curr) del doc['old_version'] del doc['last_version'] doc.update(old) return item_id def duplicate_content(self, original_doc): """ Duplicates the 'original_doc' including it's version history. Copy and Duplicate actions use this method. :return: guid of the duplicated article """ if original_doc.get(ITEM_TYPE, '') == CONTENT_TYPE.COMPOSITE: for groups in original_doc.get('groups'): if groups.get('id') != 'root': associations = groups.get('refs', []) for assoc in associations: if assoc.get(RESIDREF): item, _item_id, _endpoint = self.packageService.get_associated_item( assoc) assoc[RESIDREF] = assoc[ 'guid'] = self.duplicate_content(item) return self._duplicate_item(original_doc) def _duplicate_item(self, original_doc): """ Duplicates the 'original_doc' including it's version history. If the article being duplicated is contained in a desk then the article state is changed to Submitted. :return: guid of the duplicated article """ new_doc = original_doc.copy() self._remove_after_copy(new_doc) new_doc[ITEM_OPERATION] = ITEM_DUPLICATE item_model = get_model(ItemModel) on_duplicate_item(new_doc) resolve_document_version(new_doc, SOURCE, 'PATCH', new_doc) if original_doc.get('task', {}).get( 'desk') is not None and new_doc.get('state') != 'submitted': new_doc[ITEM_STATE] = CONTENT_STATE.SUBMITTED item_model.create([new_doc]) self._duplicate_versions(original_doc['guid'], new_doc) return new_doc['guid'] def _remove_after_copy(self, copied_item): """ Removes the properties which doesn't make sense to have for an item after copy. """ del copied_item[config.ID_FIELD] del copied_item['guid'] copied_item.pop(LINKED_IN_PACKAGES, None) copied_item.pop(EMBARGO, None) copied_item.pop('publish_schedule', None) def _duplicate_versions(self, old_id, new_doc): """ Duplicates the version history of the article identified by old_id. Each version identifiers are changed to have the identifiers of new_doc. :param old_id: identifier to fetch version history :param new_doc: identifiers from this doc will be used to create version history for the duplicated item. """ resource_def = app.config['DOMAIN']['archive'] version_id = versioned_id_field(resource_def) old_versions = get_resource_service('archive_versions').get( req=None, lookup={'guid': old_id}) new_versions = [] for old_version in old_versions: old_version[version_id] = new_doc[config.ID_FIELD] del old_version[config.ID_FIELD] old_version['guid'] = new_doc['guid'] old_version['unique_name'] = new_doc['unique_name'] old_version['unique_id'] = new_doc['unique_id'] old_version['versioncreated'] = utcnow() if old_version[VERSION] == new_doc[VERSION]: old_version[ITEM_OPERATION] = new_doc[ITEM_OPERATION] new_versions.append(old_version) last_version = deepcopy(new_doc) last_version['_id_document'] = new_doc['_id'] del last_version['_id'] new_versions.append(last_version) if new_versions: get_resource_service('archive_versions').post(new_versions) def deschedule_item(self, updates, doc): """ Deschedule an item. This operation removed the item from publish queue and published collection. :param dict updates: updates for the document :param doc: original is document. """ updates['state'] = 'in_progress' updates['publish_schedule'] = None updates[ITEM_OPERATION] = ITEM_DESCHEDULE # delete entries from publish queue get_resource_service('publish_queue').delete_by_article_id(doc['_id']) # delete entry from published repo get_resource_service('published').delete_by_article_id(doc['_id']) def validate_schedule(self, schedule): if not isinstance(schedule, datetime.date): raise SuperdeskApiError.badRequestError( "Schedule date is not recognized") if not schedule.date() or schedule.date().year <= 1970: raise SuperdeskApiError.badRequestError( "Schedule date is not recognized") if not schedule.time(): raise SuperdeskApiError.badRequestError( "Schedule time is not recognized") if schedule < utcnow(): raise SuperdeskApiError.badRequestError( "Schedule cannot be earlier than now") def can_edit(self, item, user_id): """ Determines if the user can edit the item or not. """ # TODO: modify this function when read only permissions for stages are implemented # TODO: and Content state related checking. if not current_user_has_privilege('archive'): return False, 'User does not have sufficient permissions.' item_location = item.get('task') if item_location: if item_location.get('desk'): if not superdesk.get_resource_service('user_desks').is_member( user_id, item_location.get('desk')): return False, 'User is not a member of the desk.' elif item_location.get('user'): if not str(item_location.get('user')) == str(user_id): return False, 'Item belongs to another user.' return True, '' def remove_expired(self, doc): """ Removes the article from production if the state is spiked """ assert doc[ITEM_STATE] == CONTENT_STATE.SPIKED, \ "Article state is %s. Only Spiked Articles can be removed" % doc[ITEM_STATE] doc_id = str(doc[config.ID_FIELD]) resource_def = app.config['DOMAIN']['archive_versions'] get_resource_service('archive_versions').delete( lookup={versioned_id_field(resource_def): doc_id}) super().delete_action({config.ID_FIELD: doc_id}) def __is_req_for_save(self, doc): """ Patch of /api/archive is being used in multiple places. This method differentiates from the patch triggered by user or not. """ if 'req_for_save' in doc: req_for_save = doc['req_for_save'] del doc['req_for_save'] return req_for_save == 'true' return True def validate_embargo(self, item): """ Validates the embargo of the item. Following are checked: 1. Item can't be a package or a take or a re-write of another story 2. Publish Schedule and Embargo are mutually exclusive 3. Always a future date except in case of Corrected and Killed. :raises: SuperdeskApiError.badRequestError() if the validation fails """ if item[ITEM_TYPE] != CONTENT_TYPE.COMPOSITE: embargo = item.get(EMBARGO) if embargo: if item.get('publish_schedule' ) or item[ITEM_STATE] == CONTENT_STATE.SCHEDULED: raise SuperdeskApiError.badRequestError( "An item can't have both Publish Schedule and Embargo") package = TakesPackageService().get_take_package(item) if package: raise SuperdeskApiError.badRequestError( "Takes doesn't support Embargo") if item.get('rewrite_of'): raise SuperdeskApiError.badRequestError( "Rewrites doesn't support Embargo") if not isinstance(embargo, datetime.date) or not embargo.time(): raise SuperdeskApiError.badRequestError("Invalid Embargo") if item[ITEM_STATE] not in PUBLISH_STATES and embargo <= utcnow( ): raise SuperdeskApiError.badRequestError( "Embargo cannot be earlier than now") elif item[ ITEM_TYPE] == CONTENT_TYPE.COMPOSITE and not self.takesService.is_takes_package( item): if item.get(EMBARGO): raise SuperdeskApiError.badRequestError( "A Package doesn't support Embargo") self.packageService.check_if_any_item_in_package_has_embargo(item)
def update_rewrite(self, original): """Removes the reference from the rewritten story in published collection.""" rewrite_service = ArchiveRewriteService() if original.get('rewrite_of') and original.get(ITEM_EVENT_ID): rewrite_service._clear_rewritten_flag(original.get(ITEM_EVENT_ID), original[config.ID_FIELD], 'rewritten_by') # write the rewritten_by to the take before spiked archive_service = get_resource_service(ARCHIVE) published_service = get_resource_service('published') takes_service = TakesPackageService() takes_package = takes_service.get_take_package(original) if takes_package and takes_package.get( SEQUENCE, 0) > 1 and original.get('rewritten_by'): # get the rewritten by rewritten_by = archive_service.find_one( req=None, _id=original.get('rewritten_by')) # get the take take_id = takes_service.get_take_by_take_no( original, take_no=takes_package.get(SEQUENCE) - 1, package=takes_package) take = archive_service.find_one(req=None, _id=take_id) # update the take and takes package with rewritten_by if take.get('rewritten_by') != rewritten_by[config.ID_FIELD]: if take.get(ITEM_STATE) in PUBLISH_STATES: published_service.update_published_items( take_id, 'rewritten_by', rewritten_by[config.ID_FIELD]) archive_service.system_update( take[config.ID_FIELD], {'rewritten_by': rewritten_by[config.ID_FIELD]}, take) if takes_package.get('rewritten_by') != rewritten_by[ config.ID_FIELD]: if takes_package.get(ITEM_STATE) in PUBLISH_STATES: published_service.update_published_items( takes_package.get(config.ID_FIELD), 'rewritten_by', rewritten_by[config.ID_FIELD]) archive_service.system_update( takes_package[config.ID_FIELD], {'rewritten_by': rewritten_by[config.ID_FIELD]}, takes_package) if rewritten_by.get('rewrite_of') != takes_package.get( config.ID_FIELD): archive_service.system_update( rewritten_by[config.ID_FIELD], {'rewrite_of': takes_package.get(config.ID_FIELD)}, rewritten_by) elif original.get('rewritten_by') or ( takes_package and takes_package.get('rewritten_by')): # you are spike the story from which the rewrite was triggered. # in this case both rewrite_of and rewritten_by are published. rewrite_id = original.get('rewritten_by') or takes_package.get( 'rewritten_by') rewritten_by = archive_service.find_one(req=None, _id=rewrite_id) archive_service.system_update(rewrite_id, { 'rewrite_of': None, 'rewrite_sequence': 0 }, rewritten_by) app.on_archive_item_updated( { 'rewrite_of': None, 'rewrite_sequence': 0 }, original, ITEM_UNLINK)
class ArchiveService(BaseService): packageService = PackageService() takesService = TakesPackageService() mediaService = ArchiveMediaService() def on_fetched(self, docs): """ Overriding this to handle existing data in Mongo & Elastic """ self.__enhance_items(docs[config.ITEMS]) def on_fetched_item(self, doc): self.__enhance_items([doc]) def __enhance_items(self, items): for item in items: handle_existing_data(item) self.takesService.enhance_with_package_info(item) def on_create(self, docs): on_create_item(docs) for doc in docs: if doc.get('body_footer') and is_normal_package(doc): raise SuperdeskApiError.badRequestError("Package doesn't support Public Service Announcements") doc['version_creator'] = doc['original_creator'] remove_unwanted(doc) update_word_count(doc) set_item_expiry({}, doc) if doc[ITEM_TYPE] == CONTENT_TYPE.COMPOSITE: self.packageService.on_create([doc]) # Do the validation after Circular Reference check passes in Package Service self.validate_embargo(doc) if doc.get('media'): self.mediaService.on_create([doc]) # let client create version 0 docs if doc.get('version') == 0: doc[config.VERSION] = doc['version'] if not doc.get('ingest_provider'): doc['source'] = DEFAULT_SOURCE_VALUE_FOR_MANUAL_ARTICLES doc.setdefault('priority', DEFAULT_PRIORITY_VALUE_FOR_MANUAL_ARTICLES) doc.setdefault('urgency', DEFAULT_URGENCY_VALUE_FOR_MANUAL_ARTICLES) convert_task_attributes_to_objectId(doc) def on_created(self, docs): packages = [doc for doc in docs if doc[ITEM_TYPE] == CONTENT_TYPE.COMPOSITE] if packages: self.packageService.on_created(packages) for doc in docs: subject = get_subject(doc) if subject: msg = 'added new {{ type }} item about "{{ subject }}"' else: msg = 'added new {{ type }} item with empty header/title' add_activity(ACTIVITY_CREATE, msg, self.datasource, item=doc, type=doc[ITEM_TYPE], subject=subject) push_content_notification(docs) def on_update(self, updates, original): """ Overridden to validate the updates to the article and take necessary actions depending on the updates. In brief, it does the following: 1. Sets state, item operation, version created, version creator, sign off and word count. 2. Resets Item Expiry 3. If the request is to de-schedule then checks and de-schedules the associated Takes Package also. 4. Creates Crops if article is a picture """ user = get_user() self._validate_updates(original, updates, user) if 'publish_schedule' in updates and original[ITEM_STATE] == CONTENT_STATE.SCHEDULED: self.deschedule_item(updates, original) # this is an deschedule action # check if there is a takes package and deschedule the takes package. package = TakesPackageService().get_take_package(original) if package and package.get('state') == 'scheduled': package_updates = {'publish_schedule': None, 'groups': package.get('groups')} self.patch(package.get(config.ID_FIELD), package_updates) return if self.__is_req_for_save(updates): update_state(original, updates) remove_unwanted(updates) self._add_system_updates(original, updates, user) if original[ITEM_TYPE] == CONTENT_TYPE.PICTURE: # create crops CropService().create_multiple_crops(updates, original) def on_updated(self, updates, original): get_component(ItemAutosave).clear(original['_id']) if original[ITEM_TYPE] == CONTENT_TYPE.COMPOSITE: self.packageService.on_updated(updates, original) CropService().delete_replaced_crop_files(updates, original) updated = copy(original) updated.update(updates) if config.VERSION in updates: add_activity(ACTIVITY_UPDATE, 'created new version {{ version }} for item {{ type }} about "{{ subject }}"', self.datasource, item=updated, version=updates[config.VERSION], subject=get_subject(updates, original), type=updated[ITEM_TYPE]) push_content_notification([updated, original]) get_resource_service('archive_broadcast').reset_broadcast_status(updates, original) def on_replace(self, document, original): document[ITEM_OPERATION] = ITEM_UPDATE remove_unwanted(document) user = get_user() lock_user = original.get('lock_user', None) force_unlock = document.get('force_unlock', False) user_id = str(user.get('_id')) if lock_user and str(lock_user) != user_id and not force_unlock: raise SuperdeskApiError.forbiddenError('The item was locked by another user') document['versioncreated'] = utcnow() set_item_expiry(document, original) document['version_creator'] = user_id if force_unlock: del document['force_unlock'] def on_replaced(self, document, original): get_component(ItemAutosave).clear(original['_id']) add_activity(ACTIVITY_UPDATE, 'replaced item {{ type }} about {{ subject }}', self.datasource, item=original, type=original['type'], subject=get_subject(original)) push_content_notification([document, original]) def on_deleted(self, doc): if doc[ITEM_TYPE] == CONTENT_TYPE.COMPOSITE: self.packageService.on_deleted(doc) remove_media_files(doc) add_activity(ACTIVITY_DELETE, 'removed item {{ type }} about {{ subject }}', self.datasource, item=doc, type=doc[ITEM_TYPE], subject=get_subject(doc)) push_content_notification([doc]) def replace(self, id, document, original): return self.restore_version(id, document, original) or super().replace(id, document, original) def find_one(self, req, **lookup): item = super().find_one(req, **lookup) if item and str(item.get('task', {}).get('stage', '')) in \ get_resource_service('users').get_invisible_stages_ids(get_user().get('_id')): raise SuperdeskApiError.forbiddenError("User does not have permissions to read the item.") handle_existing_data(item) return item def restore_version(self, id, doc, original): item_id = id old_version = int(doc.get('old_version', 0)) last_version = int(doc.get('last_version', 0)) if (not all([item_id, old_version, last_version])): return None old = get_resource_service('archive_versions').find_one(req=None, _id_document=item_id, _current_version=old_version) if old is None: raise SuperdeskApiError.notFoundError('Invalid version %s' % old_version) curr = get_resource_service(SOURCE).find_one(req=None, _id=item_id) if curr is None: raise SuperdeskApiError.notFoundError('Invalid item id %s' % item_id) if curr[config.VERSION] != last_version: raise SuperdeskApiError.preconditionFailedError('Invalid last version %s' % last_version) old['_id'] = old['_id_document'] old['_updated'] = old['versioncreated'] = utcnow() set_item_expiry(old, doc) del old['_id_document'] old[ITEM_OPERATION] = ITEM_RESTORE resolve_document_version(old, SOURCE, 'PATCH', curr) remove_unwanted(old) set_sign_off(updates=old, original=curr) super().replace(id=item_id, document=old, original=curr) del doc['old_version'] del doc['last_version'] doc.update(old) return item_id def duplicate_content(self, original_doc): """ Duplicates the 'original_doc' including it's version history. Copy and Duplicate actions use this method. :return: guid of the duplicated article """ if original_doc.get(ITEM_TYPE, '') == CONTENT_TYPE.COMPOSITE: for groups in original_doc.get('groups'): if groups.get('id') != 'root': associations = groups.get('refs', []) for assoc in associations: if assoc.get(RESIDREF): item, _item_id, _endpoint = self.packageService.get_associated_item(assoc) assoc[RESIDREF] = assoc['guid'] = self.duplicate_content(item) return self._duplicate_item(original_doc) def _duplicate_item(self, original_doc): """ Duplicates the 'original_doc' including it's version history. If the article being duplicated is contained in a desk then the article state is changed to Submitted. :return: guid of the duplicated article """ new_doc = original_doc.copy() self._remove_after_copy(new_doc) on_duplicate_item(new_doc) resolve_document_version(new_doc, SOURCE, 'PATCH', new_doc) if original_doc.get('task', {}).get('desk') is not None and new_doc.get('state') != 'submitted': new_doc[ITEM_STATE] = CONTENT_STATE.SUBMITTED convert_task_attributes_to_objectId(new_doc) get_model(ItemModel).create([new_doc]) self._duplicate_versions(original_doc['guid'], new_doc) return new_doc['guid'] def _remove_after_copy(self, copied_item): """ Removes the properties which doesn't make sense to have for an item after copy. """ del copied_item[config.ID_FIELD] del copied_item['guid'] copied_item.pop(LINKED_IN_PACKAGES, None) copied_item.pop(EMBARGO, None) copied_item.pop('publish_schedule', None) copied_item.pop('lock_time', None) copied_item.pop('lock_session', None) copied_item.pop('lock_user', None) task = copied_item.get('task', {}) task.pop(LAST_PRODUCTION_DESK, None) task.pop(LAST_AUTHORING_DESK, None) def _duplicate_versions(self, old_id, new_doc): """ Duplicates the version history of the article identified by old_id. Each version identifiers are changed to have the identifiers of new_doc. :param old_id: identifier to fetch version history :param new_doc: identifiers from this doc will be used to create version history for the duplicated item. """ resource_def = app.config['DOMAIN']['archive'] version_id = versioned_id_field(resource_def) old_versions = get_resource_service('archive_versions').get(req=None, lookup={'guid': old_id}) new_versions = [] for old_version in old_versions: old_version[version_id] = new_doc[config.ID_FIELD] del old_version[config.ID_FIELD] old_version['guid'] = new_doc['guid'] old_version['unique_name'] = new_doc['unique_name'] old_version['unique_id'] = new_doc['unique_id'] old_version['versioncreated'] = utcnow() if old_version[VERSION] == new_doc[VERSION]: old_version[ITEM_OPERATION] = new_doc[ITEM_OPERATION] new_versions.append(old_version) last_version = deepcopy(new_doc) last_version['_id_document'] = new_doc['_id'] del last_version['_id'] new_versions.append(last_version) if new_versions: get_resource_service('archive_versions').post(new_versions) def deschedule_item(self, updates, doc): """ Deschedule an item. This operation removed the item from publish queue and published collection. :param dict updates: updates for the document :param doc: original is document. """ updates['state'] = 'in_progress' updates['publish_schedule'] = None updates[ITEM_OPERATION] = ITEM_DESCHEDULE # delete entry from published repo get_resource_service('published').delete_by_article_id(doc['_id']) def validate_schedule(self, schedule): if not isinstance(schedule, datetime.date): raise SuperdeskApiError.badRequestError("Schedule date is not recognized") if not schedule.date() or schedule.date().year <= 1970: raise SuperdeskApiError.badRequestError("Schedule date is not recognized") if not schedule.time(): raise SuperdeskApiError.badRequestError("Schedule time is not recognized") if schedule < utcnow(): raise SuperdeskApiError.badRequestError("Schedule cannot be earlier than now") def can_edit(self, item, user_id): """ Determines if the user can edit the item or not. """ # TODO: modify this function when read only permissions for stages are implemented # TODO: and Content state related checking. if not current_user_has_privilege('archive'): return False, 'User does not have sufficient permissions.' item_location = item.get('task') if item_location: if item_location.get('desk'): if not superdesk.get_resource_service('user_desks').is_member(user_id, item_location.get('desk')): return False, 'User is not a member of the desk.' elif item_location.get('user'): if not str(item_location.get('user')) == str(user_id): return False, 'Item belongs to another user.' return True, '' def delete_by_article_ids(self, ids): """ remove the content :param list ids: list of ids to be removed """ version_field = versioned_id_field(app.config['DOMAIN']['archive_versions']) get_resource_service('archive_versions').delete(lookup={version_field: {'$in': ids}}) super().delete_action({config.ID_FIELD: {'$in': ids}}) def __is_req_for_save(self, doc): """ Patch of /api/archive is being used in multiple places. This method differentiates from the patch triggered by user or not. """ if 'req_for_save' in doc: req_for_save = doc['req_for_save'] del doc['req_for_save'] return req_for_save == 'true' return True def validate_embargo(self, item): """ Validates the embargo of the item. Following are checked: 1. Item can't be a package or a take or a re-write of another story 2. Publish Schedule and Embargo are mutually exclusive 3. Always a future date except in case of Corrected and Killed. :raises: SuperdeskApiError.badRequestError() if the validation fails """ if item[ITEM_TYPE] != CONTENT_TYPE.COMPOSITE: embargo = item.get(EMBARGO) if embargo: if item.get('publish_schedule') or item[ITEM_STATE] == CONTENT_STATE.SCHEDULED: raise SuperdeskApiError.badRequestError("An item can't have both Publish Schedule and Embargo") package = TakesPackageService().get_take_package(item) if package: raise SuperdeskApiError.badRequestError("Takes doesn't support Embargo") if item.get('rewrite_of'): raise SuperdeskApiError.badRequestError("Rewrites doesn't support Embargo") if not isinstance(embargo, datetime.date) or not embargo.time(): raise SuperdeskApiError.badRequestError("Invalid Embargo") if item[ITEM_STATE] not in PUBLISH_STATES and embargo <= utcnow(): raise SuperdeskApiError.badRequestError("Embargo cannot be earlier than now") elif is_normal_package(item): if item.get(EMBARGO): raise SuperdeskApiError.badRequestError("A Package doesn't support Embargo") self.packageService.check_if_any_item_in_package_has_embargo(item) def _validate_updates(self, original, updates, user): """ Validates updates to the article for the below conditions, if any of them then exception is raised: 1. Is article locked by another user other than the user requesting for update 2. Is state of the article is Killed? 3. Is user trying to update the package with Public Service Announcements? 4. Is user authorized to update unique name of the article? 5. Is user trying to update the genre of a broadcast article? 6. Is article being scheduled and is in a package? 7. Is article being scheduled and schedule timestamp is invalid? 8. Does article has valid crops if the article type is a picture? 9. Is article a valid package if the article type is a package? 10. Does article has a valid Embargo? 11. Make sure that there are no duplicate anpa_category codes in the article. 12. Make sure there are no duplicate subjects in the upadte :raises: SuperdeskApiError.forbiddenError() - if state of the article is killed or user is not authorized to update unique name or if article is locked by another user SuperdeskApiError.badRequestError() - if Public Service Announcements are being added to a package or genre is being updated for a broadcast, is invalid for scheduling, the updates contain duplicate anpa_category or subject codes """ lock_user = original.get('lock_user', None) force_unlock = updates.get('force_unlock', False) str_user_id = str(user.get(config.ID_FIELD)) if user else None if lock_user and str(lock_user) != str_user_id and not force_unlock: raise SuperdeskApiError.forbiddenError('The item was locked by another user') if original.get(ITEM_STATE) == CONTENT_STATE.KILLED: raise SuperdeskApiError.forbiddenError("Item isn't in a valid state to be updated.") if updates.get('body_footer') and is_normal_package(original): raise SuperdeskApiError.badRequestError("Package doesn't support Public Service Announcements") if 'unique_name' in updates and not is_admin(user) \ and (user['active_privileges'].get('metadata_uniquename', 0) == 0): raise SuperdeskApiError.forbiddenError("Unauthorized to modify Unique Name") # if broadcast then update to genre is not allowed. if original.get('broadcast') and updates.get('genre') and \ any(genre.get('value', '').lower() != BROADCAST_GENRE.lower() for genre in updates.get('genre')): raise SuperdeskApiError.badRequestError('Cannot change the genre for broadcast content.') if updates.get('publish_schedule') and original[ITEM_STATE] != CONTENT_STATE.SCHEDULED \ and datetime.datetime.fromtimestamp(0).date() != updates['publish_schedule'].date(): if is_item_in_package(original): raise SuperdeskApiError.badRequestError( 'This item is in a package and it needs to be removed before the item can be scheduled!') package = TakesPackageService().get_take_package(original) or {} validate_schedule(updates['publish_schedule'], package.get(SEQUENCE, 1)) if original[ITEM_TYPE] == CONTENT_TYPE.PICTURE: CropService().validate_multiple_crops(updates, original) elif original[ITEM_TYPE] == CONTENT_TYPE.COMPOSITE: self.packageService.on_update(updates, original) # Do the validation after Circular Reference check passes in Package Service updated = original.copy() updated.update(updates) self.validate_embargo(updated) # Ensure that there are no duplicate categories in the update category_qcodes = [q['qcode'] for q in updates.get('anpa_category', []) or []] if category_qcodes and len(category_qcodes) != len(set(category_qcodes)): raise SuperdeskApiError.badRequestError("Duplicate category codes are not allowed") # Ensure that there are no duplicate subjects in the update subject_qcodes = [q['qcode'] for q in updates.get('subject', []) or []] if subject_qcodes and len(subject_qcodes) != len(set(subject_qcodes)): raise SuperdeskApiError.badRequestError("Duplicate subjects are not allowed") def _add_system_updates(self, original, updates, user): """ As the name suggests, this method adds properties which are derived based on updates sent in the request. 1. Sets item operation, version created, version creator, sign off and word count. 2. Resets Item Expiry """ convert_task_attributes_to_objectId(updates) updates[ITEM_OPERATION] = ITEM_UPDATE updates.setdefault('original_creator', original.get('original_creator')) updates['versioncreated'] = utcnow() updates['version_creator'] = str(user.get(config.ID_FIELD)) if user else None update_word_count(updates) update_version(updates, original) set_item_expiry(updates, original) set_sign_off(updates, original=original) # Clear publish_schedule field if updates.get('publish_schedule') \ and datetime.datetime.fromtimestamp(0).date() == updates.get('publish_schedule').date(): updates['publish_schedule'] = None if updates.get('force_unlock', False): del updates['force_unlock'] def get_expired_items(self, expiry_datetime): """ Get the expired items where content state is not scheduled and :param datetime expiry_datetime: expiry datetime :return pymongo.cursor: expired non published items. """ query = { '$and': [ {'expiry': {'$lte': date_to_str(expiry_datetime)}}, {'$or': [ {'task.desk': {'$ne': None}}, {ITEM_STATE: CONTENT_STATE.SPIKED, 'task.desk': None} ]} ] } req = ParsedRequest() req.max_results = config.MAX_EXPIRY_QUERY_LIMIT req.sort = 'expiry,_created' return self.get_from_mongo(req=None, lookup=query)
class ArchiveLinkService(Service): packageService = TakesPackageService() def create(self, docs, **kwargs): target_id = request.view_args['target_id'] doc = docs[0] link_id = doc.get('link_id') desk_id = doc.get('desk') service = get_resource_service(ARCHIVE) target = service.find_one(req=None, _id=target_id) self._validate_link(target, target_id) link = {} if desk_id: link = {'task': {'desk': desk_id}} user = get_user() lookup = {'_id': desk_id, 'members.user': user['_id']} desk = get_resource_service('desks').find_one(req=None, **lookup) if not desk: raise SuperdeskApiError.forbiddenError("No privileges to create new take on requested desk.") link['task']['stage'] = desk['working_stage'] if link_id: link = service.find_one(req=None, _id=link_id) linked_item = self.packageService.link_as_next_take(target, link) insert_into_versions(id_=linked_item[config.ID_FIELD]) doc.update(linked_item) build_custom_hateoas(CUSTOM_HATEOAS, doc) return [linked_item['_id']] def _validate_link(self, target, target_id): """Validates the article to be linked. :param target: article to be linked :param target_id: id of the article to be linked :raises: SuperdeskApiError """ if not target: raise SuperdeskApiError.notFoundError(message='Cannot find the target item with id {}.'.format(target_id)) if target.get(EMBARGO): raise SuperdeskApiError.badRequestError("Takes can't be created for an Item having Embargo") if is_genre(target, BROADCAST_GENRE): raise SuperdeskApiError.badRequestError("Cannot add new take to the story with genre as broadcast.") if get_resource_service('published').is_rewritten_before(target['_id']): raise SuperdeskApiError.badRequestError(message='Article has been rewritten before !') def _validate_unlink(self, target): """Validates that the links for takes or updates can be removed. :param target: article whose links will be removed :raises: SuperdeskApiError """ if target[ITEM_TYPE] != CONTENT_TYPE.TEXT: raise SuperdeskApiError.badRequestError("Only text stories can be unlinked!") # if the story is in published states then it cannot be unlinked if target[ITEM_STATE] in [CONTENT_STATE.PUBLISHED, CONTENT_STATE.CORRECTED, CONTENT_STATE.KILLED]: raise SuperdeskApiError.badRequestError("Published stories cannot be unlinked!") # if the story is not the last take then it cannot be unlinked if TakesPackageService().get_take_package(target) and \ not TakesPackageService().is_last_takes_package_item(target): raise SuperdeskApiError.badRequestError("Only the last take can be unlinked!") def on_delete(self, doc): self._validate_unlink(doc) def delete(self, lookup): target_id = request.view_args['target_id'] archive_service = get_resource_service(ARCHIVE) target = archive_service.find_one(req=None, _id=target_id) self._validate_unlink(target) updates = {} takes_package = TakesPackageService().get_take_package(target) if takes_package and TakesPackageService().is_last_takes_package_item(target): # remove the take link PackageService().remove_refs_in_package(takes_package, target_id) if target.get('rewrite_of'): # remove the rewrite info ArchiveSpikeService().update_rewrite(target) if not takes_package and not target.get('rewrite_of'): # there is nothing to do raise SuperdeskApiError.badRequestError("Only takes and updates can be unlinked!") if target.get('rewrite_of'): updates['rewrite_of'] = None if target.get('anpa_take_key'): updates['anpa_take_key'] = None if target.get('rewrite_sequence'): updates['rewrite_sequence'] = None if target.get('sequence'): updates['sequence'] = None updates['event_id'] = generate_guid(type=GUID_TAG) archive_service.system_update(target_id, updates, target) user = get_user(required=True) push_notification('item:unlink', item=target_id, user=str(user.get(config.ID_FIELD))) app.on_archive_item_updated(updates, target, ITEM_UNLINK)
class ArchiveService(BaseService): packageService = PackageService() takesService = TakesPackageService() mediaService = ArchiveMediaService() cropService = CropService() def on_fetched(self, docs): """ Overriding this to handle existing data in Mongo & Elastic """ self.enhance_items(docs[config.ITEMS]) def on_fetched_item(self, doc): self.enhance_items([doc]) def enhance_items(self, items): for item in items: handle_existing_data(item) if not app.config.get('NO_TAKES', False): self.takesService.enhance_items_with_takes_packages(items) def on_create(self, docs): on_create_item(docs) for doc in docs: if doc.get('body_footer') and is_normal_package(doc): raise SuperdeskApiError.badRequestError("Package doesn't support Public Service Announcements") doc['version_creator'] = doc['original_creator'] remove_unwanted(doc) update_word_count(doc) set_item_expiry({}, doc) if doc[ITEM_TYPE] == CONTENT_TYPE.COMPOSITE: self.packageService.on_create([doc]) # Do the validation after Circular Reference check passes in Package Service update_schedule_settings(doc, EMBARGO, doc.get(EMBARGO)) self.validate_embargo(doc) if doc.get('media'): self.mediaService.on_create([doc]) # let client create version 0 docs if doc.get('version') == 0: doc[config.VERSION] = doc['version'] self._add_desk_metadata(doc, {}) convert_task_attributes_to_objectId(doc) def on_created(self, docs): packages = [doc for doc in docs if doc[ITEM_TYPE] == CONTENT_TYPE.COMPOSITE] if packages: self.packageService.on_created(packages) profiles = set() for doc in docs: subject = get_subject(doc) if subject: msg = 'added new {{ type }} item about "{{ subject }}"' else: msg = 'added new {{ type }} item with empty header/title' add_activity(ACTIVITY_CREATE, msg, self.datasource, item=doc, type=doc[ITEM_TYPE], subject=subject) if doc.get('profile'): profiles.add(doc['profile']) self.cropService.update_media_references(doc, {}) if doc[ITEM_OPERATION] == ITEM_FETCH: app.on_archive_item_updated({'task': doc.get('task')}, doc, ITEM_FETCH) else: app.on_archive_item_updated({'task': doc.get('task')}, doc, ITEM_CREATE) get_resource_service('content_types').set_used(profiles) push_content_notification(docs) def on_update(self, updates, original): """Runs on archive update. Overridden to validate the updates to the article and take necessary actions depending on the updates. In brief, it does the following: 1. Sets state, item operation, version created, version creator, sign off and word count. 2. Resets Item Expiry 3. If the request is to de-schedule then checks and de-schedules the associated Takes Package also. 4. Creates Crops if article is a picture """ user = get_user() self._validate_updates(original, updates, user) if PUBLISH_SCHEDULE in updates and original[ITEM_STATE] == CONTENT_STATE.SCHEDULED: # check if there is a takes package and deschedule the takes package. takes_service = TakesPackageService() package = takes_service.get_take_package(original) if package and package.get(ITEM_STATE) == CONTENT_STATE.SCHEDULED: get_resource_service('published').delete_by_article_id(package.get(config.ID_FIELD)) self.delete_by_article_ids([package.get(config.ID_FIELD)]) updates[LINKED_IN_PACKAGES] = [package for package in original.get(LINKED_IN_PACKAGES, []) if package.get(PACKAGE_TYPE) != TAKES_PACKAGE] return if self.__is_req_for_save(updates): update_state(original, updates) remove_unwanted(updates) self._add_system_updates(original, updates, user) self._add_desk_metadata(updates, original) self._handle_media_updates(updates, original, user) def _handle_media_updates(self, updates, original, user): if original[ITEM_TYPE] == CONTENT_TYPE.PICTURE: # create crops self.cropService.create_multiple_crops(updates, original) if ASSOCIATIONS not in updates or not updates.get(ASSOCIATIONS): return # iterate over associations. Validate and process them if they are stored in database for item_name, item_obj in updates.get(ASSOCIATIONS).items(): if not (item_obj and config.ID_FIELD in item_obj): continue item_id = item_obj[config.ID_FIELD] media_item = {} if app.settings.get('COPY_METADATA_FROM_PARENT') and item_obj.get(ITEM_TYPE) in MEDIA_TYPES: stored_item = (original.get(ASSOCIATIONS) or {}).get(item_name) or item_obj else: media_item = stored_item = self.find_one(req=None, _id=item_id) if not stored_item: continue self._validate_updates(stored_item, item_obj, user) if stored_item[ITEM_TYPE] == CONTENT_TYPE.PICTURE: # create crops CropService().create_multiple_crops(item_obj, stored_item) # If the media item is not marked as 'used', mark it as used if original.get(ITEM_TYPE) == CONTENT_TYPE.TEXT and \ (item_obj is not stored_item or not stored_item.get('used')): if media_item is not stored_item: media_item = self.find_one(req=None, _id=item_id) if media_item and not media_item.get('used'): self.system_update(media_item['_id'], {'used': True}, media_item) stored_item['used'] = True stored_item.update(item_obj) updates[ASSOCIATIONS][item_name] = stored_item def on_updated(self, updates, original): get_component(ItemAutosave).clear(original['_id']) if original[ITEM_TYPE] == CONTENT_TYPE.COMPOSITE: self.packageService.on_updated(updates, original) updated = copy(original) updated.update(updates) if config.VERSION in updates: add_activity(ACTIVITY_UPDATE, 'created new version {{ version }} for item {{ type }} about "{{ subject }}"', self.datasource, item=updated, version=updates[config.VERSION], subject=get_subject(updates, original), type=updated[ITEM_TYPE]) push_content_notification([updated, original]) get_resource_service('archive_broadcast').reset_broadcast_status(updates, original) if updates.get('profile'): get_resource_service('content_types').set_used([updates.get('profile')]) self.cropService.update_media_references(updates, original) def on_replace(self, document, original): document[ITEM_OPERATION] = ITEM_UPDATE remove_unwanted(document) user = get_user() lock_user = original.get('lock_user', None) force_unlock = document.get('force_unlock', False) user_id = str(user.get('_id')) if lock_user and str(lock_user) != user_id and not force_unlock: raise SuperdeskApiError.forbiddenError('The item was locked by another user') document['versioncreated'] = utcnow() set_item_expiry(document, original) document['version_creator'] = user_id if force_unlock: del document['force_unlock'] def on_replaced(self, document, original): get_component(ItemAutosave).clear(original['_id']) add_activity(ACTIVITY_UPDATE, 'replaced item {{ type }} about {{ subject }}', self.datasource, item=original, type=original['type'], subject=get_subject(original)) push_content_notification([document, original]) self.cropService.update_media_references(document, original) def on_deleted(self, doc): get_component(ItemAutosave).clear(doc['_id']) if doc[ITEM_TYPE] == CONTENT_TYPE.COMPOSITE: self.packageService.on_deleted(doc) remove_media_files(doc) add_activity(ACTIVITY_DELETE, 'removed item {{ type }} about {{ subject }}', self.datasource, item=doc, type=doc[ITEM_TYPE], subject=get_subject(doc)) push_expired_notification([doc.get(config.ID_FIELD)]) app.on_archive_item_deleted(doc) def replace(self, id, document, original): return self.restore_version(id, document, original) or super().replace(id, document, original) def find_one(self, req, **lookup): item = super().find_one(req, **lookup) if item and str(item.get('task', {}).get('stage', '')) in \ get_resource_service('users').get_invisible_stages_ids(get_user().get('_id')): raise SuperdeskApiError.forbiddenError("User does not have permissions to read the item.") handle_existing_data(item) return item def restore_version(self, id, doc, original): item_id = id old_version = int(doc.get('old_version', 0)) last_version = int(doc.get('last_version', 0)) if (not all([item_id, old_version, last_version])): return None old = get_resource_service('archive_versions').find_one(req=None, _id_document=item_id, _current_version=old_version) if old is None: raise SuperdeskApiError.notFoundError('Invalid version %s' % old_version) curr = get_resource_service(SOURCE).find_one(req=None, _id=item_id) if curr is None: raise SuperdeskApiError.notFoundError('Invalid item id %s' % item_id) if curr[config.VERSION] != last_version: raise SuperdeskApiError.preconditionFailedError('Invalid last version %s' % last_version) old['_id'] = old['_id_document'] old['_updated'] = old['versioncreated'] = utcnow() set_item_expiry(old, doc) old.pop('_id_document', None) old.pop(SIGN_OFF, None) old[ITEM_OPERATION] = ITEM_RESTORE resolve_document_version(old, SOURCE, 'PATCH', curr) remove_unwanted(old) set_sign_off(updates=old, original=curr) super().replace(id=item_id, document=old, original=curr) old.pop('old_version', None) old.pop('last_version', None) doc.update(old) return item_id def duplicate_content(self, original_doc, state=None): """ Duplicates the 'original_doc' including it's version history. Copy and Duplicate actions use this method. :return: guid of the duplicated article """ if original_doc.get(ITEM_TYPE, '') == CONTENT_TYPE.COMPOSITE: for groups in original_doc.get('groups'): if groups.get('id') != 'root': associations = groups.get('refs', []) for assoc in associations: if assoc.get(RESIDREF): item, _item_id, _endpoint = self.packageService.get_associated_item(assoc) assoc[RESIDREF] = assoc['guid'] = self.duplicate_content(item) return self._duplicate_item(original_doc, state) def _duplicate_item(self, original_doc, state=None): """Duplicates an item. Duplicates the 'original_doc' including it's version history. If the article being duplicated is contained in a desk then the article state is changed to Submitted. :return: guid of the duplicated article """ new_doc = original_doc.copy() self._remove_after_copy(new_doc) on_duplicate_item(new_doc, original_doc) resolve_document_version(new_doc, SOURCE, 'PATCH', new_doc) if original_doc.get('task', {}).get('desk') is not None and new_doc.get(ITEM_STATE) != CONTENT_STATE.SUBMITTED: new_doc[ITEM_STATE] = CONTENT_STATE.SUBMITTED if state: new_doc[ITEM_STATE] = state convert_task_attributes_to_objectId(new_doc) get_model(ItemModel).create([new_doc]) self._duplicate_versions(original_doc['_id'], new_doc) self._duplicate_history(original_doc['_id'], new_doc) app.on_archive_item_updated({'duplicate_id': new_doc['guid']}, original_doc, ITEM_DUPLICATE) app.on_archive_item_updated({'duplicate_id': original_doc['_id']}, new_doc, ITEM_DUPLICATED_FROM) return new_doc['guid'] def _remove_after_copy(self, copied_item): """Removes the properties which doesn't make sense to have for an item after copy. """ # get the archive schema keys archive_schema_keys = list(app.config['DOMAIN'][SOURCE]['schema'].keys()) archive_schema_keys.extend([config.ID_FIELD, config.LAST_UPDATED, config.DATE_CREATED, config.VERSION, config.ETAG]) # Delete the keys that are not part of archive schema. keys_to_delete = [key for key in copied_item.keys() if key not in archive_schema_keys] keys_to_delete.extend([config.ID_FIELD, 'guid', LINKED_IN_PACKAGES, EMBARGO, PUBLISH_SCHEDULE, SCHEDULE_SETTINGS, 'lock_time', 'lock_action', 'lock_session', 'lock_user', SIGN_OFF, 'rewritten_by', 'rewrite_of', 'rewrite_sequence', 'highlights', '_type', 'event_id']) for key in keys_to_delete: copied_item.pop(key, None) task = copied_item.get('task', {}) task.pop(LAST_PRODUCTION_DESK, None) task.pop(LAST_AUTHORING_DESK, None) def _duplicate_versions(self, old_id, new_doc): """Duplicates versions for an item. Duplicates the versions of the article identified by old_id. Each version identifiers are changed to have the identifiers of new_doc. :param old_id: identifier to fetch versions :param new_doc: identifiers from this doc will be used to create versions for the duplicated item. """ resource_def = app.config['DOMAIN']['archive'] version_id = versioned_id_field(resource_def) old_versions = get_resource_service('archive_versions').get(req=None, lookup={version_id: old_id}) new_versions = [] for old_version in old_versions: old_version[version_id] = new_doc[config.ID_FIELD] del old_version[config.ID_FIELD] old_version['guid'] = new_doc['guid'] old_version['unique_name'] = new_doc['unique_name'] old_version['unique_id'] = new_doc['unique_id'] old_version['versioncreated'] = utcnow() if old_version[config.VERSION] == new_doc[config.VERSION]: old_version[ITEM_OPERATION] = new_doc[ITEM_OPERATION] new_versions.append(old_version) last_version = deepcopy(new_doc) last_version['_id_document'] = new_doc['_id'] del last_version['_id'] new_versions.append(last_version) if new_versions: get_resource_service('archive_versions').post(new_versions) def _duplicate_history(self, old_id, new_doc): """Duplicates history for an item. Duplicates the history of the article identified by old_id. Each history identifiers are changed to have the identifiers of new_doc. :param old_id: identifier to fetch history :param new_doc: identifiers from this doc will be used to create version history for the duplicated item. """ resource_def = app.config['DOMAIN']['archive'] version_id = versioned_id_field(resource_def) old_history_items = get_resource_service('archive_history').get(req=None, lookup={'item_id': old_id}) new_history_items = [] for old_history_item in old_history_items: old_history_item[version_id] = new_doc[config.ID_FIELD] del old_history_item[config.ID_FIELD] old_history_item['item_id'] = new_doc['guid'] new_history_items.append(old_history_item) if new_history_items: get_resource_service('archive_history').post(new_history_items) def update(self, id, updates, original): # this needs to here as resolve_nested_documents (in eve) will add the schedule_settings if PUBLISH_SCHEDULE in updates and original[ITEM_STATE] == CONTENT_STATE.SCHEDULED: self.deschedule_item(updates, original) # this is an deschedule action return super().update(id, updates, original) def deschedule_item(self, updates, original): """Deschedule an item. This operation removed the item from publish queue and published collection. :param dict updates: updates for the document :param original: original is document. """ updates[ITEM_STATE] = CONTENT_STATE.PROGRESS updates[PUBLISH_SCHEDULE] = original[PUBLISH_SCHEDULE] updates[SCHEDULE_SETTINGS] = original[SCHEDULE_SETTINGS] updates[ITEM_OPERATION] = ITEM_DESCHEDULE # delete entry from published repo get_resource_service('published').delete_by_article_id(original['_id']) def can_edit(self, item, user_id): """ Determines if the user can edit the item or not. """ # TODO: modify this function when read only permissions for stages are implemented # TODO: and Content state related checking. if not current_user_has_privilege('archive'): return False, 'User does not have sufficient permissions.' item_location = item.get('task') if item_location: if item_location.get('desk'): if not superdesk.get_resource_service('user_desks').is_member(user_id, item_location.get('desk')): return False, 'User is not a member of the desk.' elif item_location.get('user'): if not str(item_location.get('user')) == str(user_id): return False, 'Item belongs to another user.' return True, '' def delete_by_article_ids(self, ids): """Remove the content :param list ids: list of ids to be removed """ version_field = versioned_id_field(app.config['DOMAIN']['archive_versions']) get_resource_service('archive_versions').delete_action(lookup={version_field: {'$in': ids}}) super().delete_action({config.ID_FIELD: {'$in': ids}}) def __is_req_for_save(self, doc): """Checks if doc contains req_for_save key. Patch of /api/archive is being used in multiple places. This method differentiates from the patch triggered by user or not. :param dictionary doc: doc to test """ if 'req_for_save' in doc: req_for_save = doc['req_for_save'] del doc['req_for_save'] return req_for_save == 'true' return True def validate_embargo(self, item): """Validates the embargo of the item. Following are checked: 1. Item can't be a package or a take or a re-write of another story 2. Publish Schedule and Embargo are mutually exclusive 3. Always a future date except in case of Corrected and Killed. :raises: SuperdeskApiError.badRequestError() if the validation fails """ if item[ITEM_TYPE] != CONTENT_TYPE.COMPOSITE: if EMBARGO in item: embargo = item.get(SCHEDULE_SETTINGS, {}).get('utc_{}'.format(EMBARGO)) if embargo: if item.get(PUBLISH_SCHEDULE) or item[ITEM_STATE] == CONTENT_STATE.SCHEDULED: raise SuperdeskApiError.badRequestError("An item can't have both Publish Schedule and Embargo") if (item[ITEM_STATE] not in {CONTENT_STATE.KILLED, CONTENT_STATE.SCHEDULED}) \ and embargo <= utcnow(): raise SuperdeskApiError.badRequestError("Embargo cannot be earlier than now") package = TakesPackageService().get_take_package(item) if package and package.get(SEQUENCE, 1) > 1: raise SuperdeskApiError.badRequestError("Takes doesn't support Embargo") if item.get('rewrite_of'): raise SuperdeskApiError.badRequestError("Rewrites doesn't support Embargo") if not isinstance(embargo, datetime.date) or not embargo.time(): raise SuperdeskApiError.badRequestError("Invalid Embargo") elif is_normal_package(item): if item.get(EMBARGO): raise SuperdeskApiError.badRequestError("A Package doesn't support Embargo") self.packageService.check_if_any_item_in_package_has_embargo(item) def _validate_updates(self, original, updates, user): """Validates updates to the article for the below conditions. If any of these conditions are met then exception is raised: 1. Is article locked by another user other than the user requesting for update 2. Is state of the article is Killed? 3. Is user trying to update the package with Public Service Announcements? 4. Is user authorized to update unique name of the article? 5. Is user trying to update the genre of a broadcast article? 6. Is article being scheduled and is in a package? 7. Is article being scheduled and schedule timestamp is invalid? 8. Does article has valid crops if the article type is a picture? 9. Is article a valid package if the article type is a package? 10. Does article has a valid Embargo? 11. Make sure that there are no duplicate anpa_category codes in the article. 12. Make sure there are no duplicate subjects in the upadte :raises: SuperdeskApiError.forbiddenError() - if state of the article is killed or user is not authorized to update unique name or if article is locked by another user SuperdeskApiError.badRequestError() - if Public Service Announcements are being added to a package or genre is being updated for a broadcast, is invalid for scheduling, the updates contain duplicate anpa_category or subject codes """ updated = original.copy() updated.update(updates) lock_user = original.get('lock_user', None) force_unlock = updates.get('force_unlock', False) str_user_id = str(user.get(config.ID_FIELD)) if user else None if lock_user and str(lock_user) != str_user_id and not force_unlock: raise SuperdeskApiError.forbiddenError('The item was locked by another user') if original.get(ITEM_STATE) == CONTENT_STATE.KILLED: raise SuperdeskApiError.forbiddenError("Item isn't in a valid state to be updated.") if updates.get('body_footer') and is_normal_package(original): raise SuperdeskApiError.badRequestError("Package doesn't support Public Service Announcements") if 'unique_name' in updates and not is_admin(user) \ and (user['active_privileges'].get('metadata_uniquename', 0) == 0): raise SuperdeskApiError.forbiddenError("Unauthorized to modify Unique Name") # if broadcast then update to genre is not allowed. if original.get('broadcast') and updates.get('genre') and \ any(genre.get('qcode', '').lower() != BROADCAST_GENRE.lower() for genre in updates.get('genre')): raise SuperdeskApiError.badRequestError('Cannot change the genre for broadcast content.') if PUBLISH_SCHEDULE in updates or "schedule_settings" in updates: if is_item_in_package(original): raise SuperdeskApiError.badRequestError( 'This item is in a package and it needs to be removed before the item can be scheduled!') package = TakesPackageService().get_take_package(original) or {} update_schedule_settings(updated, PUBLISH_SCHEDULE, updated.get(PUBLISH_SCHEDULE)) if updates.get(PUBLISH_SCHEDULE): validate_schedule(updated.get(SCHEDULE_SETTINGS, {}).get('utc_{}'.format(PUBLISH_SCHEDULE)), package.get(SEQUENCE, 1)) updates[SCHEDULE_SETTINGS] = updated.get(SCHEDULE_SETTINGS, {}) if original[ITEM_TYPE] == CONTENT_TYPE.PICTURE: CropService().validate_multiple_crops(updates, original) elif original[ITEM_TYPE] == CONTENT_TYPE.COMPOSITE: self.packageService.on_update(updates, original) # update the embargo date update_schedule_settings(updated, EMBARGO, updated.get(EMBARGO)) # Do the validation after Circular Reference check passes in Package Service self.validate_embargo(updated) if EMBARGO in updates or "schedule_settings" in updates: updates[SCHEDULE_SETTINGS] = updated.get(SCHEDULE_SETTINGS, {}) # Ensure that there are no duplicate categories in the update category_qcodes = [q['qcode'] for q in updates.get('anpa_category', []) or []] if category_qcodes and len(category_qcodes) != len(set(category_qcodes)): raise SuperdeskApiError.badRequestError("Duplicate category codes are not allowed") # Ensure that there are no duplicate subjects in the update subject_qcodes = [q['qcode'] for q in updates.get('subject', []) or []] if subject_qcodes and len(subject_qcodes) != len(set(subject_qcodes)): raise SuperdeskApiError.badRequestError("Duplicate subjects are not allowed") def _add_system_updates(self, original, updates, user): """Adds system updates to item. As the name suggests, this method adds properties which are derived based on updates sent in the request. 1. Sets item operation, version created, version creator, sign off and word count. 2. Resets Item Expiry """ convert_task_attributes_to_objectId(updates) updates[ITEM_OPERATION] = ITEM_UPDATE updates.setdefault('original_creator', original.get('original_creator')) updates['versioncreated'] = utcnow() updates['version_creator'] = str(user.get(config.ID_FIELD)) if user else None update_word_count(updates, original) update_version(updates, original) set_item_expiry(updates, original) set_sign_off(updates, original=original) set_dateline(updates, original) # Clear publish_schedule field if updates.get(PUBLISH_SCHEDULE) \ and datetime.datetime.fromtimestamp(0).date() == updates.get(PUBLISH_SCHEDULE).date(): updates[PUBLISH_SCHEDULE] = None updates[SCHEDULE_SETTINGS] = {} if updates.get('force_unlock', False): del updates['force_unlock'] def get_expired_items(self, expiry_datetime, invalid_only=False): """Get the expired items. Where content state is not scheduled and the item matches given parameters :param datetime expiry_datetime: expiry datetime :param bool invalid_only: True only invalid items :return pymongo.cursor: expired non published items. """ unique_id = 0 while True: req = ParsedRequest() req.sort = 'unique_id' query = { '$and': [ {'expiry': {'$lte': date_to_str(expiry_datetime)}}, {'$or': [ {'task.desk': {'$ne': None}}, {ITEM_STATE: CONTENT_STATE.SPIKED, 'task.desk': None} ]} ] } query['$and'].append({'unique_id': {'$gt': unique_id}}) if invalid_only: query['$and'].append({'expiry_status': 'invalid'}) else: query['$and'].append({'expiry_status': {'$ne': 'invalid'}}) req.where = json.dumps(query) req.max_results = config.MAX_EXPIRY_QUERY_LIMIT items = list(self.get_from_mongo(req=req, lookup=None)) if not len(items): break unique_id = items[-1]['unique_id'] yield items def _add_desk_metadata(self, updates, original): """Populate updates metadata from item desk in case it's set. It will only add data which is not set yet on the item. :param updates: updates to item that should be saved :param original: original item version before update """ return get_resource_service('desks').apply_desk_metadata(updates, original)
def on_update(self, updates, original): updates[ITEM_OPERATION] = ITEM_SPIKE takes_service = TakesPackageService() if not takes_service.can_spike_takes_package_item(original): raise SuperdeskApiError.badRequestError(message="Only last take of the package can be spiked.")
def on_update(self, updates, original): updates[ITEM_OPERATION] = ITEM_UPDATE is_update_allowed(original) user = get_user() if 'publish_schedule' in updates and original['state'] == 'scheduled': # this is an deschedule action self.deschedule_item(updates, original) # check if there is a takes package and deschedule the takes package. package = TakesPackageService().get_take_package(original) if package and package.get('state') == 'scheduled': package_updates = {'publish_schedule': None, 'groups': package.get('groups')} self.patch(package.get(config.ID_FIELD), package_updates) return if updates.get('publish_schedule'): if datetime.datetime.fromtimestamp(0).date() == updates.get('publish_schedule').date(): # publish_schedule field will be cleared updates['publish_schedule'] = None else: # validate the schedule if is_item_in_package(original): raise SuperdeskApiError.badRequestError(message='This item is in a package' + ' it needs to be removed before the item can be scheduled!') package = TakesPackageService().get_take_package(original) or {} validate_schedule(updates.get('publish_schedule'), package.get(SEQUENCE, 1)) if 'unique_name' in updates and not is_admin(user) \ and (user['active_privileges'].get('metadata_uniquename', 0) == 0): raise SuperdeskApiError.forbiddenError("Unauthorized to modify Unique Name") remove_unwanted(updates) if self.__is_req_for_save(updates): update_state(original, updates) lock_user = original.get('lock_user', None) force_unlock = updates.get('force_unlock', False) updates.setdefault('original_creator', original.get('original_creator')) str_user_id = str(user.get('_id')) if user else None if lock_user and str(lock_user) != str_user_id and not force_unlock: raise SuperdeskApiError.forbiddenError('The item was locked by another user') updates['versioncreated'] = utcnow() set_item_expiry(updates, original) updates['version_creator'] = str_user_id set_sign_off(updates, original=original) update_word_count(updates) if force_unlock: del updates['force_unlock'] # create crops crop_service = ArchiveCropService() crop_service.validate_multiple_crops(updates, original) crop_service.create_multiple_crops(updates, original) if original[ITEM_TYPE] == CONTENT_TYPE.COMPOSITE: self.packageService.on_update(updates, original) update_version(updates, original) # Do the validation after Circular Reference check passes in Package Service updated = original.copy() updated.update(updates) self.validate_embargo(updated)
def is_last_take(self, article): article[config.ID_FIELD] = article.get('item_id', article.get(config.ID_FIELD)) return TakesPackageService().is_last_takes_package_item(article)
def _validate_take(self, original): takes_service = TakesPackageService() if not takes_service.is_last_takes_package_item(original): raise SuperdeskApiError.badRequestError(message="Only last take of the package can be spiked.")
def on_update(self, updates, original): updates[ITEM_OPERATION] = ITEM_UPDATE is_update_allowed(original) user = get_user() if 'publish_schedule' in updates and original['state'] == 'scheduled': # this is an deschedule action self.deschedule_item(updates, original) # check if there is a takes package and deschedule the takes package. package = TakesPackageService().get_take_package(original) if package and package.get('state') == 'scheduled': package_updates = { 'publish_schedule': None, 'groups': package.get('groups') } self.patch(package.get(config.ID_FIELD), package_updates) return if updates.get('publish_schedule'): if datetime.datetime.fromtimestamp(0).date() == updates.get( 'publish_schedule').date(): # publish_schedule field will be cleared updates['publish_schedule'] = None else: # validate the schedule if is_item_in_package(original): raise SuperdeskApiError.\ badRequestError(message='This item is in a package' + ' it needs to be removed before the item can be scheduled!') package = TakesPackageService().get_take_package( original) or {} validate_schedule(updates.get('publish_schedule'), package.get(SEQUENCE, 1)) if 'unique_name' in updates and not is_admin(user) \ and (user['active_privileges'].get('metadata_uniquename', 0) == 0): raise SuperdeskApiError.forbiddenError( "Unauthorized to modify Unique Name") remove_unwanted(updates) if self.__is_req_for_save(updates): update_state(original, updates) lock_user = original.get('lock_user', None) force_unlock = updates.get('force_unlock', False) updates.setdefault('original_creator', original.get('original_creator')) str_user_id = str(user.get('_id')) if user else None if lock_user and str(lock_user) != str_user_id and not force_unlock: raise SuperdeskApiError.forbiddenError( 'The item was locked by another user') updates['versioncreated'] = utcnow() set_item_expiry(updates, original) updates['version_creator'] = str_user_id set_sign_off(updates, original=original) update_word_count(updates) if force_unlock: del updates['force_unlock'] # create crops crop_service = ArchiveCropService() crop_service.validate_multiple_crops(updates, original) crop_service.create_multiple_crops(updates, original) if original[ITEM_TYPE] == CONTENT_TYPE.COMPOSITE: self.packageService.on_update(updates, original) update_version(updates, original) # Do the validation after Circular Reference check passes in Package Service updated = original.copy() updated.update(updates) self.validate_embargo(updated)
class BasePublishService(BaseService): """Base service for different "publish" services.""" publish_type = 'publish' published_state = 'published' non_digital = partial(filter, lambda s: s.get('subscriber_type', '') == SUBSCRIBER_TYPES.WIRE) digital = partial(filter, lambda s: (s.get('subscriber_type', '') in {SUBSCRIBER_TYPES.DIGITAL, SUBSCRIBER_TYPES.ALL})) takes_package_service = TakesPackageService() package_service = PackageService() def on_update(self, updates, original): self._refresh_associated_items(original) self._validate(original, updates) self._set_updates(original, updates, updates.get(config.LAST_UPDATED, utcnow())) convert_task_attributes_to_objectId(updates) # ??? self._process_publish_updates(original, updates) self._mark_media_item_as_used(updates, original) def on_updated(self, updates, original): original = get_resource_service(ARCHIVE).find_one(req=None, _id=original[config.ID_FIELD]) updates.update(original) if updates[ITEM_OPERATION] != ITEM_KILL and \ original.get(ITEM_TYPE) in [CONTENT_TYPE.TEXT, CONTENT_TYPE.PREFORMATTED]: get_resource_service('archive_broadcast').on_broadcast_master_updated(updates[ITEM_OPERATION], original) get_resource_service('archive_broadcast').reset_broadcast_status(updates, original) push_content_notification([updates]) self._import_into_legal_archive(updates) CropService().update_media_references(updates, original, True) superdesk.item_published.send(self, item=original) def update(self, id, updates, original): """ Handles workflow of each Publish, Corrected and Killed. """ try: user = get_user() auto_publish = updates.get('auto_publish', False) if original[ITEM_TYPE] == CONTENT_TYPE.COMPOSITE: self._publish_package_items(original, updates) self._update_archive(original, updates, should_insert_into_versions=auto_publish) else: self._refresh_associated_items(original) updated = deepcopy(original) updated.update(deepcopy(updates)) if updates.get(ASSOCIATIONS): self._refresh_associated_items(updated) # updates got lost with update # process takes package for published or corrected items # if no_takes is true but takes package exists then process takes package. if self.published_state != CONTENT_STATE.KILLED and \ (not app.config.get('NO_TAKES', False) or self.takes_package_service.get_take_package_id(updated)): self._process_takes_package(original, updated, updates) self._update_archive(original, updates, should_insert_into_versions=auto_publish) self.update_published_collection(published_item_id=original[config.ID_FIELD], updated=updated) from apps.publish.enqueue import enqueue_published enqueue_published.apply_async() push_notification('item:publish', item=str(id), unique_name=original['unique_name'], desk=str(original.get('task', {}).get('desk', '')), user=str(user.get(config.ID_FIELD, ''))) except SuperdeskApiError as e: raise except KeyError as e: logger.exception(e) raise SuperdeskApiError.badRequestError( message="Key is missing on article to be published: {}".format(str(e)) ) except Exception as e: raise SuperdeskApiError.internalError(message="Failed to publish the item: {}".format(str(id)), exception=e) def _process_takes_package(self, original, updated, updates): if original[ITEM_TYPE] in {CONTENT_TYPE.TEXT, CONTENT_TYPE.PREFORMATTED} \ and not is_genre(original, BROADCAST_GENRE): # check if item is in a digital package last_updated = updates.get(config.LAST_UPDATED, utcnow()) package = self.takes_package_service.get_take_package(original) if not package: ''' If type of the item is text or preformatted then item need to be sent to digital subscribers, so package the item as a take. ''' package_id = self.takes_package_service.package_story_as_a_take(updated, {}, None) package = get_resource_service(ARCHIVE).find_one(req=None, _id=package_id) updates[LINKED_IN_PACKAGES] = updated[LINKED_IN_PACKAGES] package_id = package[config.ID_FIELD] package_updates = self.process_takes(updates_of_take_to_be_published=updates, original_of_take_to_be_published=original, package=package) # If the original package is corrected then the next take shouldn't change it # back to 'published' preserve_state = package.get(ITEM_STATE, '') == CONTENT_STATE.CORRECTED and \ updates.get(ITEM_OPERATION, ITEM_PUBLISH) == ITEM_PUBLISH self._set_updates(package, package_updates, last_updated, preserve_state) package_updates.setdefault(ITEM_OPERATION, updates.get(ITEM_OPERATION, ITEM_PUBLISH)) if self.published_state == CONTENT_STATE.KILLED: package_copy = deepcopy(package) package_copy.update(package_updates) self.apply_kill_override(package_copy, package_updates) self._update_archive(package, package_updates) package.update(package_updates) self.update_published_collection(published_item_id=package_id) self._import_into_legal_archive(package) def is_targeted(self, article, target=None): """Checks if article is targeted. Returns True if the given article has been targeted by region or subscriber type or specific subscribers. :param article: Article to check :param target: Optional specific target to check if exists :return: """ if target: return len(article.get(target, [])) > 0 else: return len(article.get('target_regions', []) + article.get('target_types', []) + article.get('target_subscribers', [])) > 0 def _validate(self, original, updates): self.raise_if_invalid_state_transition(original) updated = original.copy() updated.update(updates) self.raise_if_not_marked_for_publication(updated) takes_package = self.takes_package_service.get_take_package(original) if self.publish_type == 'publish': # validate if take can be published if takes_package and not self.takes_package_service.can_publish_take( takes_package, updates.get(SEQUENCE, original.get(SEQUENCE, 1))): raise PublishQueueError.previous_take_not_published_error( Exception("Previous takes are not published.")) update_schedule_settings(updated, PUBLISH_SCHEDULE, updated.get(PUBLISH_SCHEDULE)) validate_schedule(updated.get(SCHEDULE_SETTINGS, {}).get('utc_{}'.format(PUBLISH_SCHEDULE)), takes_package.get(SEQUENCE, 1) if takes_package else 1) if original[ITEM_TYPE] != CONTENT_TYPE.COMPOSITE and updates.get(EMBARGO): update_schedule_settings(updated, EMBARGO, updated.get(EMBARGO)) get_resource_service(ARCHIVE).validate_embargo(updated) if self.publish_type in [ITEM_CORRECT, ITEM_KILL]: if updates.get(EMBARGO) and not original.get(EMBARGO): raise SuperdeskApiError.badRequestError("Embargo can't be set after publishing") if self.publish_type in [ITEM_CORRECT, ITEM_KILL]: if updates.get('dateline'): raise SuperdeskApiError.badRequestError("Dateline can't be modified after publishing") if self.publish_type == ITEM_PUBLISH and updated.get('rewritten_by'): # if update is published then user cannot publish the takes rewritten_by = get_resource_service(ARCHIVE).find_one(req=None, _id=updated.get('rewritten_by')) if rewritten_by and rewritten_by.get(ITEM_STATE) in PUBLISH_STATES: raise SuperdeskApiError.badRequestError("Cannot publish the story after Update is published.!") publish_type = 'auto_publish' if updates.get('auto_publish') else self.publish_type validate_item = {'act': publish_type, 'type': original['type'], 'validate': updated} validation_errors = get_resource_service('validate').post([validate_item]) if validation_errors[0]: raise ValidationError(validation_errors) validation_errors = [] self._validate_associated_items(original, takes_package, validation_errors) if original[ITEM_TYPE] == CONTENT_TYPE.COMPOSITE: self._validate_package(original, updates, validation_errors) if len(validation_errors) > 0: raise ValidationError(validation_errors) def _validate_package(self, package, updates, validation_errors): # make sure package is not scheduled or spiked if package[ITEM_STATE] in (CONTENT_STATE.SPIKED, CONTENT_STATE.SCHEDULED): validation_errors.append('Package cannot be {}'.format(package[ITEM_STATE])) if package.get(EMBARGO): validation_errors.append('Package cannot have Embargo') items = self.package_service.get_residrefs(package) if self.publish_type in [ITEM_CORRECT, ITEM_KILL]: removed_items, added_items = self._get_changed_items(items, updates) # we raise error if correction is done on a empty package. Kill is fine. if len(removed_items) == len(items) and len(added_items) == 0 and self.publish_type == ITEM_CORRECT: validation_errors.append("Corrected package cannot be empty!") def raise_if_not_marked_for_publication(self, original): if original.get('flags', {}).get('marked_for_not_publication', False): raise SuperdeskApiError.badRequestError('Cannot publish an item which is marked as Not for Publication') def raise_if_invalid_state_transition(self, original): if not is_workflow_state_transition_valid(self.publish_type, original[ITEM_STATE]): error_message = "Can't {} as item state is {}" if original[ITEM_TYPE] == CONTENT_TYPE.TEXT else \ "Can't {} as either package state or one of the items state is {}" raise InvalidStateTransitionError(error_message.format(self.publish_type, original[ITEM_STATE])) def get_digital_id_for_package_item(self, package_item): """Finds the digital item id for a given item in a package. :param package_item: item in a package :return string: Digital item id if there's one otherwise id of package_item """ if package_item[ITEM_TYPE] not in [CONTENT_TYPE.TEXT, CONTENT_TYPE.PREFORMATTED]: return package_item[config.ID_FIELD] else: package_item_takes_package_id = self.takes_package_service.get_take_package_id(package_item) if not package_item_takes_package_id: return package_item[config.ID_FIELD] return package_item_takes_package_id def _process_publish_updates(self, original, updates): """Common updates for published items.""" desk = None if original.get('task', {}).get('desk'): desk = get_resource_service('desks').find_one(req=None, _id=original['task']['desk']) if not original.get('ingest_provider'): updates['source'] = desk['source'] if desk and desk.get('source', '') \ else app.settings['DEFAULT_SOURCE_VALUE_FOR_MANUAL_ARTICLES'] updates['pubstatus'] = PUB_STATUS.CANCELED if self.publish_type == 'kill' else PUB_STATUS.USABLE self._set_item_expiry(updates, original) def _set_item_expiry(self, updates, original): """Set the expiry for the item. :param dict updates: doc on which publishing action is performed """ desk_id = original.get('task', {}).get('desk') stage_id = original.get('task', {}).get('stage') if EMBARGO in updates or PUBLISH_SCHEDULE in updates: offset = get_utc_schedule(updates, PUBLISH_SCHEDULE) or get_utc_schedule(updates, EMBARGO) elif EMBARGO in original or PUBLISH_SCHEDULE in original: offset = get_utc_schedule(original, PUBLISH_SCHEDULE) or get_utc_schedule(original, EMBARGO) if app.settings.get('PUBLISHED_CONTENT_EXPIRY_MINUTES'): updates['expiry'] = get_expiry_date(app.settings['PUBLISHED_CONTENT_EXPIRY_MINUTES'], offset=offset) else: updates['expiry'] = get_expiry(desk_id, stage_id, offset=offset) def _is_take_item(self, item): """Returns True if the item was a take.""" return item[ITEM_TYPE] != CONTENT_TYPE.COMPOSITE and \ (not (self.is_targeted(item) or is_genre(item, BROADCAST_GENRE))) def process_takes(self, updates_of_take_to_be_published, package, original_of_take_to_be_published=None): """Process takes for publishing Primary rule for publishing a Take in Takes Package is: all previous takes must be published before a take can be published. Also, generates body_html of the takes package and make sure the metadata for the package is the same as the metadata of the take to be published. :param dict updates_of_take_to_be_published: updates for the take to be published :param dict package: Takes package to publish :param dict original_of_take_to_be_published: original of the take to be published :return: Takes Package Updates """ takes = self.takes_package_service.get_published_takes(package) body_html = updates_of_take_to_be_published.get('body_html', original_of_take_to_be_published.get('body_html', '')) package_updates = {} groups = package.get(GROUPS, []) if groups: take_refs = [ref for group in groups if group['id'] == 'main' for ref in group.get('refs')] sequence_num_of_take_to_be_published = 0 take_article_id = updates_of_take_to_be_published.get( config.ID_FIELD, original_of_take_to_be_published[config.ID_FIELD]) for r in take_refs: if r[GUID_FIELD] == take_article_id: sequence_num_of_take_to_be_published = r[SEQUENCE] r['is_published'] = True break if takes and self.published_state != 'killed': body_html_list = [take.get('body_html', '') for take in takes] if self.published_state == CONTENT_STATE.PUBLISHED: body_html_list.append(body_html) else: body_html_list[sequence_num_of_take_to_be_published - 1] = body_html package_updates['body_html'] = '<br>'.join(body_html_list) else: package_updates['body_html'] = body_html metadata_tobe_copied = self.takes_package_service.fields_for_creating_take.copy() metadata_tobe_copied.extend([PUBLISH_SCHEDULE, SCHEDULE_SETTINGS, 'byline', EMBARGO]) if 'auto_publish' in updates_of_take_to_be_published: metadata_tobe_copied.extend(['auto_publish']) updated_take = original_of_take_to_be_published.copy() updated_take.update(updates_of_take_to_be_published) metadata_from = updated_take # only the copy the abstract from the take when there is a change in abstract or it is non-empty. if metadata_from.get('abstract', '') != '' and \ metadata_from.get('abstract') != package.get('abstract'): metadata_tobe_copied.append('abstract') # this rules has changed to use the last published metadata # per ticket SD-3885 # if self.published_state == 'corrected' and len(takes) > 1: # # get the last take metadata only if there are more than one takes # metadata_from = takes[-1] for metadata in metadata_tobe_copied: if metadata in metadata_from: package_updates[metadata] = metadata_from.get(metadata) # rewire the takes_package to the take_packages of 'rewrite_of' item if sequence_num_of_take_to_be_published == 1 and \ original_of_take_to_be_published.get('rewrite_of'): rewrite_of = self.find_one(req=None, _id=original_of_take_to_be_published.get('rewrite_of')) if rewrite_of: rewrite_package = self.takes_package_service.get_take_package(rewrite_of) if rewrite_package: package_updates['rewrite_of'] = rewrite_package.get(config.ID_FIELD) if self.published_state == CONTENT_STATE.KILLED: # if published then update the groups in the take # to reflect the correct version, headline and slugline package_updates[ASSOCIATIONS] = None archive_service = get_resource_service(ARCHIVE) for ref in take_refs: if ref.get(RESIDREF) != take_article_id: archive_item = archive_service.find_one(req=None, _id=ref.get(RESIDREF)) ref['headline'] = archive_item.get('headline') ref['slugline'] = archive_item.get('slugline') ref[config.VERSION] = archive_item.get(config.VERSION) else: # update association for takes. self.takes_package_service.update_associations(package_updates, package, metadata_from) take_ref = next((ref for ref in take_refs if ref.get(RESIDREF) == take_article_id), None) if take_ref: # for published take update the version, headline and slugline take_ref['headline'] = updated_take.get('headline') take_ref['slugline'] = updated_take.get('slugline') take_ref[config.VERSION] = updated_take.get(config.VERSION) package_updates[GROUPS] = groups self._set_item_expiry(package_updates, package) return package_updates def _publish_package_items(self, package, updates): """Publishes all items of a package recursively then publishes the package itself. :param package: package to publish :param updates: payload """ items = self.package_service.get_residrefs(package) if len(items) == 0 and self.publish_type == ITEM_PUBLISH: raise SuperdeskApiError.badRequestError("Empty package cannot be published!") removed_items = [] if self.publish_type in [ITEM_CORRECT, ITEM_KILL]: removed_items, added_items = self._get_changed_items(items, updates) # we raise error if correction is done on a empty package. Kill is fine. if len(removed_items) == len(items) and len(added_items) == 0 and self.publish_type == ITEM_CORRECT: raise SuperdeskApiError.badRequestError("Corrected package cannot be empty!") items.extend(added_items) if not updates.get('groups') and package.get('groups'): # this saves some typing in tests updates['groups'] = package.get('groups') if items: archive_publish = get_resource_service('archive_publish') for guid in items: package_item = super().find_one(req=None, _id=guid) if not package_item: raise SuperdeskApiError.badRequestError( "Package item with id: {} does not exist.".format(guid)) if package_item[ITEM_STATE] not in PUBLISH_STATES: # if the item is not published then publish it if package_item[ITEM_TYPE] == CONTENT_TYPE.COMPOSITE: # if the item is a package do recursion to publish sub_updates = {i: updates[i] for i in ['state', 'operation'] if i in updates} sub_updates['groups'] = list(package_item['groups']) self._publish_package_items(package_item, sub_updates) self._update_archive(original=package_item, updates=sub_updates, should_insert_into_versions=False) else: # publish the item package_item[PUBLISHED_IN_PACKAGE] = package[config.ID_FIELD] archive_publish.patch(id=package_item.pop(config.ID_FIELD), updates=package_item) insert_into_versions(id_=guid) elif guid in removed_items: # remove the package information from the package item. linked_in_packages = [linked for linked in package_item.get(LINKED_IN_PACKAGES) if linked.get(PACKAGE) != package.get(config.ID_FIELD)] super().system_update(guid, {LINKED_IN_PACKAGES: linked_in_packages}, package_item) package_item = super().find_one(req=None, _id=guid) self.package_service.update_field_in_package(updates, package_item[config.ID_FIELD], config.VERSION, package_item[config.VERSION]) if package_item.get(ASSOCIATIONS): self.package_service.update_field_in_package( updates, package_item[config.ID_FIELD], ASSOCIATIONS, package_item[ASSOCIATIONS] ) updated = deepcopy(package) updated.update(updates) self.update_published_collection(published_item_id=package[config.ID_FIELD], updated=updated) def update_published_collection(self, published_item_id, updated=None): """Updates the published collection with the published item. Set the last_published_version to false for previous versions of the published items. :param: str published_item_id: _id of the document. """ published_item = super().find_one(req=None, _id=published_item_id) published_item = copy(published_item) if updated: published_item.update(updated) published_item['is_take_item'] = self.takes_package_service.get_take_package_id(published_item) is not None if not published_item.get('digital_item_id'): published_item['digital_item_id'] = self.get_digital_id_for_package_item(published_item) get_resource_service(PUBLISHED).update_published_items(published_item_id, LAST_PUBLISHED_VERSION, False) return get_resource_service(PUBLISHED).post([published_item]) def set_state(self, original, updates): """Set the state of the document based on the action (publish, correction, kill) :param dict original: original document :param dict updates: updates related to document """ updates[PUBLISH_SCHEDULE] = None updates[SCHEDULE_SETTINGS] = {} updates[ITEM_STATE] = self.published_state def _set_updates(self, original, updates, last_updated, preserve_state=False): """Sets config.VERSION, config.LAST_UPDATED, ITEM_STATE in updates document. If item is being published and embargo is available then append Editorial Note with 'Embargoed'. :param dict original: original document :param dict updates: updates related to the original document :param datetime last_updated: datetime of the updates. """ if not preserve_state: self.set_state(original, updates) updates.setdefault(config.LAST_UPDATED, last_updated) if original[config.VERSION] == updates.get(config.VERSION, original[config.VERSION]): resolve_document_version(document=updates, resource=ARCHIVE, method='PATCH', latest_doc=original) user = get_user() if user and user.get(config.ID_FIELD): updates['version_creator'] = user[config.ID_FIELD] def _update_archive(self, original, updates, versioned_doc=None, should_insert_into_versions=True): """Updates the articles into archive collection and inserts the latest into archive_versions. Also clears autosaved versions if any. :param: versioned_doc: doc which can be inserted into archive_versions :param: should_insert_into_versions if True inserts the latest document into versions collection """ self.backend.update(self.datasource, original[config.ID_FIELD], updates, original) app.on_archive_item_updated(updates, original, updates[ITEM_OPERATION]) if should_insert_into_versions: if versioned_doc is None: insert_into_versions(id_=original[config.ID_FIELD]) else: insert_into_versions(doc=versioned_doc) get_component(ItemAutosave).clear(original[config.ID_FIELD]) def _get_changed_items(self, existing_items, updates): """Returns the added and removed items from existing_items. :param existing_items: Existing list :param updates: Changes :return: list of removed items and list of added items """ if 'groups' in updates: new_items = self.package_service.get_residrefs(updates) removed_items = list(set(existing_items) - set(new_items)) added_items = list(set(new_items) - set(existing_items)) return removed_items, added_items else: return [], [] def _validate_associated_items(self, original_item, takes_package, validation_errors=[]): """Validates associated items. This function will ensure that the unpublished content validates and none of the content is locked by other than the publishing session, also do not allow any killed or spiked content. :param package: :param takes_package: :param validation_errors: validation errors are appended if there are any. """ items = [value for value in (original_item.get(ASSOCIATIONS) or {}).values()] if original_item[ITEM_TYPE] == CONTENT_TYPE.COMPOSITE and \ not takes_package and self.publish_type == ITEM_PUBLISH: items.extend(self.package_service.get_residrefs(original_item)) for item in items: if type(item) == dict: doc = item elif item: doc = super().find_one(req=None, _id=item) else: continue if not doc: continue if original_item[ITEM_TYPE] == CONTENT_TYPE.COMPOSITE: digital = self.takes_package_service.get_take_package(doc) or {} self._validate_associated_items(doc, digital, validation_errors) # make sure no items are killed or spiked or scheduled doc_item_state = doc.get(ITEM_STATE, CONTENT_STATE.PUBLISHED) if doc_item_state in (CONTENT_STATE.KILLED, CONTENT_STATE.SPIKED, CONTENT_STATE.SCHEDULED): validation_errors.append('Item cannot contain associated {} item'.format(doc[ITEM_STATE])) if doc.get(EMBARGO): validation_errors.append('Item cannot have associated items with Embargo') # don't validate items that already have published if doc_item_state not in [CONTENT_STATE.PUBLISHED, CONTENT_STATE.CORRECTED]: validate_item = {'act': self.publish_type, 'type': doc[ITEM_TYPE], 'validate': doc} if type(item) == dict: validate_item['embedded'] = True errors = get_resource_service('validate').post([validate_item], headline=True) if errors[0]: pre_errors = ['Associated item %s %s' % (doc.get('slugline', ''), error) for error in errors[0]] validation_errors.extend(pre_errors) # check the locks on the items if doc.get('lock_session', None) and original_item['lock_session'] != doc['lock_session']: validation_errors.extend(['{}: packaged item cannot be locked'.format(doc['headline'])]) def _import_into_legal_archive(self, doc): """Import into legal archive async :param {dict} doc: document to be imported """ if doc.get(ITEM_STATE) != CONTENT_STATE.SCHEDULED: kwargs = { 'item_id': doc.get(config.ID_FIELD) } # countdown=3 is for elasticsearch to be refreshed with archive and published changes import_into_legal_archive.apply_async(countdown=3, kwargs=kwargs) # @UndefinedVariable def _apply_kill_template(self, item): # apply the kill template updates = render_content_template_by_name(item, 'kill') return updates def apply_kill_override(self, item, updates): """Applies kill override. Kill requires content to be generate based on the item getting killed (and not the item that is being actioned on). :param dict item: item to kill :param dict updates: updates that needs to be modified based on the template :return: """ try: desk_name = get_resource_service('desks').get_desk_name(item.get('task', {}).get('desk')) city = get_dateline_city(item.get('dateline')) kill_header = json.loads(render_template('article_killed_override.json', slugline=item.get('slugline', ''), headline=item.get('headline', ''), desk_name=desk_name, city=city, versioncreated=item.get('versioncreated', item.get(config.LAST_UPDATED)), body_html=updates.get('body_html', ''), update_headline=updates.get('headline', '')), strict=False) for key, value in kill_header.items(): kill_header[key] = html.unescape(value) updates.update(kill_header) except: logger.exception('Failed to apply kill header template to item {}.'.format(item)) def _refresh_associated_items(self, original): """Refresh associated items before publishing Any further updates made to basic metadata done after item was associated will be carried on and used when validating those items. """ associations = original.get(ASSOCIATIONS) or {} for _, item in associations.items(): if type(item) == dict and item.get(config.ID_FIELD): keys = DEFAULT_SCHEMA.keys() if app.settings.get('COPY_METADATA_FROM_PARENT') and item.get(ITEM_TYPE) in MEDIA_TYPES: updates = original keys = FIELDS_TO_COPY_FOR_ASSOCIATED_ITEM else: updates = super().find_one(req=None, _id=item[config.ID_FIELD]) or {} update_item_data(item, updates, keys) def _mark_media_item_as_used(self, updates, original): if ASSOCIATIONS not in updates or not updates.get(ASSOCIATIONS): return for item_name, item_obj in updates.get(ASSOCIATIONS).items(): if not (item_obj and config.ID_FIELD in item_obj): continue item_id = item_obj[config.ID_FIELD] media_item = {} if app.settings.get('COPY_METADATA_FROM_PARENT') and item_obj.get(ITEM_TYPE) in MEDIA_TYPES: stored_item = (original.get(ASSOCIATIONS) or {}).get(item_name) or item_obj else: media_item = stored_item = self.find_one(req=None, _id=item_id) if not stored_item: continue # If the media item is not marked as 'used', mark it as used if original.get(ITEM_TYPE) == CONTENT_TYPE.TEXT and \ (item_obj is not stored_item or not stored_item.get('used')): archive_service = get_resource_service('archive') if media_item is not stored_item: media_item = archive_service.find_one(req=None, _id=item_id) if media_item and not media_item.get('used'): archive_service.system_update(media_item['_id'], {'used': True}, media_item) stored_item['used'] = True
class ArchiveBroadcastService(BaseService): takesService = TakesPackageService() packageService = PackageService() def create(self, docs): service = get_resource_service(SOURCE) item_id = request.view_args['item_id'] item = service.find_one(req=None, _id=item_id) doc = docs[0] self._valid_broadcast_item(item) desk_id = doc.get('desk') desk = None if desk_id: desk = get_resource_service('desks').find_one(req=None, _id=desk_id) doc.pop('desk', None) doc['task'] = {} if desk: doc['task']['desk'] = desk.get(config.ID_FIELD) doc['task']['stage'] = desk.get('working_stage') doc['task']['user'] = get_user().get('_id') genre_list = get_resource_service('vocabularies').find_one(req=None, _id='genre') or {} broadcast_genre = [{'qcode': genre.get('qcode'), 'name': genre.get('name')} for genre in genre_list.get('items', []) if genre.get('qcode') == BROADCAST_GENRE and genre.get('is_active')] if not broadcast_genre: raise SuperdeskApiError.badRequestError(message="Cannot find the {} genre.".format(BROADCAST_GENRE)) doc['broadcast'] = { 'status': '', 'master_id': item_id, 'takes_package_id': self.takesService.get_take_package_id(item), 'rewrite_id': item.get('rewritten_by') } doc['genre'] = broadcast_genre doc['family_id'] = item.get('family_id') for key in FIELDS_TO_COPY: doc[key] = item.get(key) resolve_document_version(document=doc, resource=SOURCE, method='POST') service.post(docs) insert_into_versions(id_=doc[config.ID_FIELD]) build_custom_hateoas(CUSTOM_HATEOAS, doc) return [doc[config.ID_FIELD]] def _valid_broadcast_item(self, item): """Validates item for broadcast. Broadcast item can only be created for Text or Pre-formatted item. Item state needs to be Published or Corrected :param dict item: Item from which the broadcast item will be created """ if not item: raise SuperdeskApiError.notFoundError( message="Cannot find the requested item id.") if not item.get(ITEM_TYPE) in [CONTENT_TYPE.TEXT, CONTENT_TYPE.PREFORMATTED]: raise SuperdeskApiError.badRequestError(message="Invalid content type.") if item.get(ITEM_STATE) not in [CONTENT_STATE.CORRECTED, CONTENT_STATE.PUBLISHED]: raise SuperdeskApiError.badRequestError(message="Invalid content state.") def _get_broadcast_items(self, ids, include_archived_repo=False): """Returns list of broadcast items. Get the broadcast items for the master_id and takes_package_id :param list ids: list of item ids :param include_archived_repo True if archived repo needs to be included in search, default is False :return list: list of broadcast items """ query = { 'query': { 'filtered': { 'filter': { 'bool': { 'must': {'term': {'genre.name': BROADCAST_GENRE}}, 'should': [ {'terms': {'broadcast.master_id': ids}}, {'terms': {'broadcast.takes_package_id': ids}} ] } } } } } req = ParsedRequest() repos = 'archive,published' if include_archived_repo: repos = 'archive,published,archived' req.args = {'source': json.dumps(query), 'repo': repos} return get_resource_service('search').get(req=req, lookup=None) def get_broadcast_items_from_master_story(self, item, include_archived_repo=False): """Get the broadcast items from the master story. :param dict item: master story item :param include_archived_repo True if archived repo needs to be included in search, default is False :return list: returns list of broadcast items """ if is_genre(item, BROADCAST_GENRE): return [] ids = [str(item.get(config.ID_FIELD))] if self.takesService.get_take_package_id(item): ids.append(str(self.takesService.get_take_package_id(item))) return list(self._get_broadcast_items(ids, include_archived_repo)) def on_broadcast_master_updated(self, item_event, item, takes_package_id=None, rewrite_id=None): """Runs when master item is updated. This event is called when the master story is corrected, published, re-written, new take/re-opened :param str item_event: Item operations :param dict item: item on which operation performed. :param str takes_package_id: takes_package_id. :param str rewrite_id: re-written story id. """ status = '' if not item or is_genre(item, BROADCAST_GENRE): return if item_event == ITEM_CREATE and takes_package_id: if RE_OPENS.lower() in str(item.get('anpa_take_key', '')).lower(): status = 'Story Re-opened' else: status = 'New Take Created' elif item_event == ITEM_CREATE and rewrite_id: status = 'Master Story Re-written' elif item_event == ITEM_PUBLISH: status = 'Master Story Published' elif item_event == ITEM_CORRECT: status = 'Master Story Corrected' broadcast_items = self.get_broadcast_items_from_master_story(item) if not broadcast_items: return processed_ids = set() for broadcast_item in broadcast_items: try: if broadcast_item.get('lock_user'): continue updates = { 'broadcast': broadcast_item.get('broadcast'), } if status: updates['broadcast']['status'] = status if not updates['broadcast']['takes_package_id'] and takes_package_id: updates['broadcast']['takes_package_id'] = takes_package_id if not updates['broadcast']['rewrite_id'] and rewrite_id: updates['broadcast']['rewrite_id'] = rewrite_id if not broadcast_item.get(config.ID_FIELD) in processed_ids: self._update_broadcast_status(broadcast_item, updates) # list of ids that are processed. processed_ids.add(broadcast_item.get(config.ID_FIELD)) except: logger.exception('Failed to update status for the broadcast item {}'. format(broadcast_item.get(config.ID_FIELD))) def _update_broadcast_status(self, item, updates): """Update the status of the broadcast item. :param dict item: broadcast item to be updated :param dict updates: broadcast updates """ # update the published collection as well as archive. if item.get(ITEM_STATE) in [CONTENT_STATE.PUBLISHED, CONTENT_STATE.CORRECTED, CONTENT_STATE.KILLED]: get_resource_service('published').update_published_items(item.get(config.ID_FIELD), 'broadcast', updates.get('broadcast')) archive_item = get_resource_service(SOURCE).find_one(req=None, _id=item.get(config.ID_FIELD)) get_resource_service(SOURCE).system_update(archive_item.get(config.ID_FIELD), updates, archive_item) def remove_rewrite_refs(self, item): """Remove the rewrite references from the broadcast item if the re-write is spiked. :param dict item: Re-written article of the original story """ if is_genre(item, BROADCAST_GENRE): return query = { 'query': { 'filtered': { 'filter': { 'and': [ {'term': {'genre.name': BROADCAST_GENRE}}, {'term': {'broadcast.rewrite_id': item.get(config.ID_FIELD)}} ] } } } } req = ParsedRequest() req.args = {'source': json.dumps(query)} broadcast_items = list(get_resource_service(SOURCE).get(req=req, lookup=None)) for broadcast_item in broadcast_items: try: updates = { 'broadcast': broadcast_item.get('broadcast', {}) } updates['broadcast']['rewrite_id'] = None if 'Re-written' in updates['broadcast']['status']: updates['broadcast']['status'] = '' self._update_broadcast_status(broadcast_item, updates) except: logger.exception('Failed to remove rewrite id for the broadcast item {}'. format(broadcast_item.get(config.ID_FIELD))) def reset_broadcast_status(self, updates, original): """Reset the broadcast status if the broadcast item is updated. :param dict updates: updates to the original document :param dict original: original document """ if original.get('broadcast') and original.get('broadcast').get('status', ''): broadcast_updates = { 'broadcast': original.get('broadcast'), } broadcast_updates['broadcast']['status'] = '' self._update_broadcast_status(original, broadcast_updates) updates.update(broadcast_updates) def spike_item(self, original): """If Original item is re-write then it will remove the reference from the broadcast item. :param: dict original: original document """ broadcast_items = [item for item in self.get_broadcast_items_from_master_story(original) if item.get(ITEM_STATE) not in PUBLISH_STATES] spike_service = get_resource_service('archive_spike') for item in broadcast_items: id_ = item.get(config.ID_FIELD) try: self.packageService.remove_spiked_refs_from_package(id_) updates = {ITEM_STATE: CONTENT_STATE.SPIKED} resolve_document_version(updates, SOURCE, 'PATCH', item) spike_service.patch(id_, updates) insert_into_versions(id_=id_) except: logger.exception(message="Failed to spike the related broadcast item {}.".format(id_)) if original.get('rewrite_of') and original.get(ITEM_STATE) not in PUBLISH_STATES: self.remove_rewrite_refs(original) def kill_broadcast(self, updates, original): """Kill the broadcast items :param dict updates: :param dict original: :return: """ broadcast_items = [item for item in self.get_broadcast_items_from_master_story(original) if item.get(ITEM_STATE) in PUBLISH_STATES] correct_service = get_resource_service('archive_correct') kill_service = get_resource_service('archive_kill') for item in broadcast_items: item_id = item.get(config.ID_FIELD) packages = self.packageService.get_packages(item_id) processed_packages = set() for package in packages: if str(package[config.ID_FIELD]) in processed_packages: continue try: if package.get(ITEM_STATE) in {CONTENT_STATE.PUBLISHED, CONTENT_STATE.CORRECTED}: package_updates = { config.LAST_UPDATED: utcnow(), GROUPS: self.packageService.remove_group_ref(package, item_id) } refs = self.packageService.get_residrefs(package_updates) if refs: correct_service.patch(package.get(config.ID_FIELD), package_updates) else: package_updates['body_html'] = updates.get('body_html', '') kill_service.patch(package.get(config.ID_FIELD), package_updates) processed_packages.add(package.get(config.ID_FIELD)) else: package_list = self.packageService.remove_refs_in_package(package, item_id, processed_packages) processed_packages = processed_packages.union(set(package_list)) except: logger.exception('Failed to remove the broadcast item {} from package {}'.format( item_id, package.get(config.ID_FIELD) )) kill_service.kill_item(updates, item)