def remove_expired(self, doc): """ Removes the expired published article from 'published' collection. Below is the workflow: 1. If doc is a package then recursively move the items in the package to legal archive if the item wasn't moved before. And then run the package through the expiry workflow. 2. Check if doc has expired. This is needed because when doc is a package and expired but the items in the package are not expired. If expired then update allow_post_publish_actions, can_be_removed flags. 3. Insert/update the doc in Legal Archive repository (a) All references to master data like users, desks ... are de-normalized before inserting into Legal Archive. Same is done to each version of the article. (b) Inserts Transmission Details (fetched from publish_queue collection) 4. If the doc has expired then remove the transmission details from Publish Queue collection. 5. If the doc has expired and is eligible to be removed from production then remove the article and its versions from archive and archive_versions collections respectively. 6. Removes the item from published collection, if can_be_removed is True :param doc: doc in 'published' collection """ log_msg_format = "{{'_id': {item_id}, 'unique_name': {unique_name}, 'version': {_current_version}, " \ "'expired_on': {expiry}}}." log_msg = log_msg_format.format(**doc) version_id_field = versioned_id_field(app.config['DOMAIN'][ARCHIVE]) can_be_removed = doc['can_be_removed'] if not can_be_removed: if doc[ITEM_TYPE] == CONTENT_TYPE.COMPOSITE: # Step 1 logging.info('Starting the workflow for removal of the expired package ' + log_msg) self._handle_expired_package(doc) logging.info('Starting the workflow for removal of the expired item ' + log_msg) is_expired = doc['expiry'] <= utcnow() if is_expired: # Step 2 updates = self._update_flags(doc, log_msg) doc.update(updates) can_be_removed = updates.get('can_be_removed', can_be_removed) # Step 3 publish_queue_items = self._upsert_into_legal_archive(doc, version_id_field, log_msg_format, log_msg) if is_expired: # Step 4 logging.info('Removing the transmission details for expired item ' + log_msg) for publish_queue_item in publish_queue_items: get_resource_service('publish_queue').delete_action( lookup={config.ID_FIELD: publish_queue_item[config.ID_FIELD]}) if is_expired and self.can_remove_from_production(doc): # Step 5 logging.info('Removing the expired item from production ' + log_msg) lookup = {'$and': [{version_id_field: doc['item_id']}, {config.VERSION: {'$lte': doc[config.VERSION]}}]} get_resource_service('archive_versions').delete(lookup) get_resource_service(ARCHIVE).delete_action({config.ID_FIELD: doc['item_id']}) if can_be_removed: # Step 6 logging.info('Removing the expired item from published collection ' + log_msg) self.delete_by_article_id(_id=doc['item_id'], doc=doc) logging.info('Completed the workflow for removing the expired publish item ' + log_msg)
def _duplicate_versions(self, old_id, new_doc): """ Duplicates the version history of the article identified by old_id. Each version identifiers are changed to have the identifiers of new_doc. :param old_id: identifier to fetch version history :param new_doc: identifiers from this doc will be used to create version history for the duplicated item. """ resource_def = app.config['DOMAIN']['archive'] version_id = versioned_id_field(resource_def) old_versions = get_resource_service('archive_versions').get(req=None, lookup={'guid': old_id}) new_versions = [] for old_version in old_versions: old_version[version_id] = new_doc[config.ID_FIELD] del old_version[config.ID_FIELD] old_version['guid'] = new_doc['guid'] old_version['unique_name'] = new_doc['unique_name'] old_version['unique_id'] = new_doc['unique_id'] old_version['versioncreated'] = utcnow() if old_version[VERSION] == new_doc[VERSION]: old_version[ITEM_OPERATION] = new_doc[ITEM_OPERATION] new_versions.append(old_version) last_version = deepcopy(new_doc) last_version['_id_document'] = new_doc['_id'] del last_version['_id'] new_versions.append(last_version) if new_versions: get_resource_service('archive_versions').post(new_versions)
def _duplicate_versions(self, old_id, new_doc): """ Duplicates the version history of the article identified by old_id. Each version identifiers are changed to have the identifiers of new_doc. :param old_id: identifier to fetch version history :param new_doc: identifiers from this doc will be used to create version history for the duplicated item. """ old_versions = get_resource_service("archive_versions").get(req=None, lookup={"guid": old_id}) new_versions = [] for old_version in old_versions: old_version[versioned_id_field()] = new_doc[config.ID_FIELD] del old_version[config.ID_FIELD] old_version["guid"] = new_doc["guid"] old_version["unique_name"] = new_doc["unique_name"] old_version["unique_id"] = new_doc["unique_id"] old_version["versioncreated"] = utcnow() if old_version[VERSION] == new_doc[VERSION]: old_version[ITEM_OPERATION] = new_doc[ITEM_OPERATION] new_versions.append(old_version) last_version = deepcopy(new_doc) last_version["_id_document"] = new_doc["_id"] del last_version["_id"] new_versions.append(last_version) if new_versions: get_resource_service("archive_versions").post(new_versions)
def delete_by_article_ids(self, ids): """ remove the content :param list ids: list of ids to be removed """ version_field = versioned_id_field(app.config['DOMAIN']['archive_versions']) get_resource_service('archive_versions').delete(lookup={version_field: {'$in': ids}}) super().delete_action({config.ID_FIELD: {'$in': ids}})
def remove_expired(self, doc): """ Removes the article from production if the state is spiked """ assert doc[ITEM_STATE] == CONTENT_STATE.SPIKED, \ "Article state is %s. Only Spiked Articles can be removed" % doc[ITEM_STATE] doc_id = str(doc[config.ID_FIELD]) super().delete_action({config.ID_FIELD: doc_id}) get_resource_service('archive_versions').delete(lookup={versioned_id_field(): doc_id})
def get(self, req, lookup): resource_def = app.config['DOMAIN']['items'] id_field = versioned_id_field(resource_def) lookup = {'$and': [lookup, {'pubstatus': {'$ne': 'canceled'}}]} version_history = list(super().get_from_mongo(req=req, lookup=lookup)) for doc in version_history: doc[config.ID_FIELD] = doc[id_field] return ListCursor(version_history)
def _get_legal_archive_details(self, article_id, publishing_action=None): archive_service = get_resource_service(LEGAL_ARCHIVE_NAME) archive_versions_service = get_resource_service(LEGAL_ARCHIVE_VERSIONS_NAME) publish_queue_service = get_resource_service(LEGAL_PUBLISH_QUEUE_NAME) article = archive_service.find_one(_id=article_id, req=None) article_versions = archive_versions_service.get(req=None, lookup={versioned_id_field(): article_id}) lookup = {'item_id': article_id, 'publishing_action': publishing_action} if publishing_action else \ {'item_id': article_id} queue_items = publish_queue_service.get(req=None, lookup=lookup) return article, article_versions, queue_items
def remove_expired(self, doc): """ Removes the article from production if the state is spiked """ assert doc[ITEM_STATE] == CONTENT_STATE.SPIKED, \ "Article state is %s. Only Spiked Articles can be removed" % doc[ITEM_STATE] doc_id = str(doc[config.ID_FIELD]) resource_def = app.config['DOMAIN']['archive_versions'] get_resource_service('archive_versions').delete( lookup={versioned_id_field(resource_def): doc_id}) super().delete_action({config.ID_FIELD: doc_id})
def get(self, req, lookup): resource_def = app.config['DOMAIN']['items'] id_field = versioned_id_field(resource_def) if req and req.args and req.args.get(config.ID_FIELD): version_history = list(super().get_from_mongo( req=ParsedRequest(), lookup={id_field: req.args.get(config.ID_FIELD)})) else: version_history = list(super().get_from_mongo(req=req, lookup=lookup)) for doc in version_history: doc[config.ID_FIELD] = doc[id_field] return ListCursor(version_history)
def remove_expired(self, doc): """ Removes the expired published article from 'published' collection. Below is the workflow: 1. Update allow_post_publish_actions, can_be_removed flags. 2. Inserts/updates the article in Legal Archive repository (a) All references to master data like users, desks ... is de-normalized and then inserted into Legal Archive. Same is done to each version of the article. (b) Inserts Formatted Items (c) Inserts Transmission Details (fetched from publish_queue collection) 3. Removes the item from publish_queue and published collections, if can_be_removed is True 4. Remove the article and its versions from archive collection if all of the below conditions are met: (a) Article hasn't been published/corrected/killed again (b) Article isn't part of a package :param doc: doc in 'published' collection """ logging.info("Starting the workflow for removing the expired publish item with id: %s" % doc['item_id']) can_be_removed = doc['can_be_removed'] if not can_be_removed: # Step 1 updates = self._update_flags(doc) doc.update(updates) can_be_removed = updates.get('can_be_removed', can_be_removed) # Step 2 publish_queue_items = self._upsert_into_legal_archive(doc) for publish_queue_item in publish_queue_items: get_resource_service('publish_queue').delete_action( lookup={config.ID_FIELD: publish_queue_item[config.ID_FIELD]}) # Step 4 if self.can_remove_from_production(doc): resource_def = app.config['DOMAIN']['archive'] lookup = {'$and': [{versioned_id_field(resource_def): doc['item_id']}, {config.VERSION: {'$lte': doc[config.VERSION]}}]} get_resource_service('archive_versions').delete(lookup) get_resource_service(ARCHIVE).delete_action({config.ID_FIELD: doc['item_id']}) if can_be_removed: # Step 3 self.delete_by_article_id(_id=doc['item_id'], doc=doc) logging.info("Completed the workflow for removing the expired publish item with id: %s" % doc['item_id'])
def _get_legal_archive_details(self, article_id, publishing_action=None): archive_service = get_resource_service(LEGAL_ARCHIVE_NAME) archive_versions_service = get_resource_service(LEGAL_ARCHIVE_VERSIONS_NAME) publish_queue_service = get_resource_service(LEGAL_PUBLISH_QUEUE_NAME) article = archive_service.find_one(_id=article_id, req=None) resource_def = self.app.config['DOMAIN'][LEGAL_ARCHIVE_VERSIONS_NAME] version_id = versioned_id_field(resource_def) article_versions = archive_versions_service.get(req=None, lookup={version_id: article_id}) lookup = {'item_id': article_id} if publishing_action: lookup['publishing_action'] = publishing_action queue_items = publish_queue_service.get(req=None, lookup=lookup) return article, article_versions, queue_items
def remove_expired(self, doc): """ Removes the expired published article from 'published' collection. Below is the workflow: 1. If type of the article is either text or pre-formatted then a copy is inserted into Text Archive 2. Inserts/updates the article in Legal Archive repository (a) All references to master data like users, desks, destination groups... is de-normalized and then inserted into Legal Archive. Same is done to each version of the article. (b) Inserts Formatted Items (c) Inserts Transmission Details (fetched from publish_queue collection) 3. Removes the item from formatted_item, publish_queue and published collections 4. Remove the article and its versions from archive collection if all of the below conditions are met: (a) Article hasn't been published/corrected/killed again (b) Article isn't part of a package :param doc: doc in 'published' collection """ logging.info("Starting the workflow for removing the expired publish item with id: %s" % doc['item_id']) # Step 1 if 'type' in doc and doc['type'] in ['text', 'preformatted']: self._insert_into_or_remove_from_text_archive(doc) # Step 2 formatted_item_ids, publish_queue_items = self._upsert_into_legal_archive(doc) for formatted_item_id in formatted_item_ids: get_resource_service('formatted_item').delete_action(lookup={config.ID_FIELD: formatted_item_id}) for publish_queue_item in publish_queue_items: get_resource_service('publish_queue').delete_action( lookup={config.ID_FIELD: publish_queue_item[config.ID_FIELD]}) # Step 3 self.delete_by_article_id(_id=doc['item_id'], doc=doc) # Step 4 items = self.get_other_published_items(doc['item_id']) if items.count() == 0 and self.__is_orphan(doc): lookup = {'$and': [{versioned_id_field(): doc['item_id']}, {config.VERSION: {'$lte': doc[config.VERSION]}}]} get_resource_service('archive_versions').delete(lookup) get_resource_service(ARCHIVE).delete_action({config.ID_FIELD: doc['item_id']}) logging.info("Completed the workflow for removing the expired publish item with id: %s" % doc['item_id'])
def get(self, req, lookup): """ Version of an article in Legal Archive isn't maintained by Eve. Overriding this to fetch the version history. """ resource_def = app.config['DOMAIN'][LEGAL_ARCHIVE_NAME] id_field = versioned_id_field(resource_def) if req and req.args and req.args.get(config.ID_FIELD): version_history = list(super().get_from_mongo(req=ParsedRequest(), lookup={id_field: req.args.get(config.ID_FIELD)})) else: version_history = list(super().get_from_mongo(req=req, lookup=lookup)) for doc in version_history: doc[config.ID_FIELD] = doc[id_field] self.enhance(doc) return ListCursor(version_history)
def _duplicate_history(self, old_id, new_doc): """Duplicates history for an item. Duplicates the history of the article identified by old_id. Each history identifiers are changed to have the identifiers of new_doc. :param old_id: identifier to fetch history :param new_doc: identifiers from this doc will be used to create version history for the duplicated item. """ resource_def = app.config['DOMAIN']['archive'] version_id = versioned_id_field(resource_def) old_history_items = get_resource_service('archive_history').get(req=None, lookup={'item_id': old_id}) new_history_items = [] for old_history_item in old_history_items: old_history_item[version_id] = new_doc[config.ID_FIELD] del old_history_item[config.ID_FIELD] old_history_item['item_id'] = new_doc['guid'] new_history_items.append(old_history_item) if new_history_items: get_resource_service('archive_history').post(new_history_items)
def get_version(self, id, version, formatter_name): formatter = self._get_formatter(formatter_name) if not formatter: abort(404) if version: item = get_resource_service('items_versions').find_one( req=None, _id_document=id, version=version) if not item: abort(404) resource_def = app.config['DOMAIN']['items'] id_field = versioned_id_field(resource_def) item['_id'] = item[id_field] else: item = get_resource_service('items').find_one(req=None, _id=id) if not item: abort(404) # Ensure that the item has not expired if utcnow() - timedelta( days=int(get_setting('news_api_time_limit_days'))) > item.get( 'versioncreated', utcnow()): abort(404) ret = formatter.format_item(item) return {'formatted_item': ret, 'mimetype': formatter.MIMETYPE}
def getitem(resource, **lookup): """ :param resource: the name of the resource to which the document belongs. :param **lookup: the lookup query. .. versionchanged:: 0.6 Handle soft deleted documents .. versionchanged:: 0.5 Allow ``?version=all`` requests to fire ``on_fetched_*`` events. Create pagination links for document versions. (#475) Pagination links reflect current query. (#464) .. versionchanged:: 0.4 HATOEAS link for contains the business unit value even when regexes have been configured for the resource endpoint. 'on_fetched' now returns the whole response (HATEOAS metafields included.) Support for document versioning. Changed ``on_fetch_*`` changed to ``on_fetched_*``. .. versionchanged:: 0.3 Support for media fields. When IF_MATCH is disabled, no etag is included in the payload. .. versionchanged:: 0.1.1 Support for Embeded Resource Serialization. .. versionchanged:: 0.1.0 Support for optional HATEOAS. .. versionchanged: 0.0.8 'on_getting_item' event is raised when a document has been read from the database and is about to be sent to the client. .. versionchanged:: 0.0.7 Support for Rate-Limiting. .. versionchanged:: 0.0.6 Support for HEAD requests. .. versionchanged:: 0.0.6 ETag added to payload. .. versionchanged:: 0.0.5 Support for user-restricted access to resources. Support for LAST_UPDATED field missing from documents, because they were created outside the API context. .. versionchanged:: 0.0.4 Added the ``requires_auth`` decorator. .. versionchanged:: 0.0.3 Superflous ``response`` container removed. Links wrapped with ``_links``. Links are now properly JSON formatted. """ req = parse_request(resource) resource_def = config.DOMAIN[resource] embedded_fields = resolve_embedded_fields(resource, req) soft_delete_enabled = config.DOMAIN[resource]['soft_delete'] if soft_delete_enabled: # GET requests should always fetch soft deleted documents from the db # They are handled and included in 404 responses below. req.show_deleted = True document = app.data.find_one(resource, req, **lookup) if not document: abort(404) response = {} etag = None version = request.args.get(config.VERSION_PARAM) latest_doc = None cursor = None # calculate last_modified before get_old_document rolls back the document, # allowing us to invalidate the cache when _latest_version changes last_modified = last_updated(document) # synthesize old document version(s) if resource_def['versioning'] is True: latest_doc = document document = get_old_document( resource, req, lookup, document, version) # meld into response document build_response_document(document, resource, embedded_fields, latest_doc) if config.IF_MATCH: etag = document[config.ETAG] # check embedded fields resolved in build_response_document() for more # recent last updated timestamps. We don't want to respond 304 if embedded # fields have changed for field in embedded_fields: embedded_document = document.get(field) if isinstance(embedded_document, dict): embedded_last_updated = last_updated(embedded_document) if embedded_last_updated > last_modified: last_modified = embedded_last_updated # facilitate client caching by returning a 304 when appropriate cache_validators = {True: 0, False: 0} if req.if_modified_since: cache_valid = (last_modified <= req.if_modified_since) cache_validators[cache_valid] += 1 if req.if_none_match: if (resource_def['versioning'] is False) or \ (document[app.config['VERSION']] == document[app.config['LATEST_VERSION']]): cache_valid = (etag == req.if_none_match) cache_validators[cache_valid] += 1 # If all cache validators are true, return 304 if (cache_validators[True] > 0) and (cache_validators[False] == 0): return {}, last_modified, etag, 304 if version == 'all' or version == 'diffs': # find all versions lookup[versioned_id_field()] = lookup[app.config['ID_FIELD']] del lookup[app.config['ID_FIELD']] if version == 'diffs' or req.sort is None: # default sort for 'all', required sort for 'diffs' req.sort = '[("%s", 1)]' % config.VERSION req.if_modified_since = None # we always want the full history here cursor = app.data.find(resource + config.VERSIONS, req, lookup) # build all versions documents = [] if cursor.count() == 0: # this is the scenario when the document existed before # document versioning got turned on documents.append(latest_doc) else: last_document = {} # if we aren't starting on page 1, then we need to init last_doc if version == 'diffs' and req.page > 1: # grab the last document on the previous page to diff from last_version = cursor[0][app.config['VERSION']] - 1 last_document = get_old_document( resource, req, lookup, latest_doc, last_version) for i, document in enumerate(cursor): document = synthesize_versioned_document( latest_doc, document, resource_def) build_response_document( document, resource, embedded_fields, latest_doc) if version == 'diffs': if i == 0: documents.append(document) else: documents.append(diff_document( resource_def, last_document, document)) last_document = document else: documents.append(document) # add documents to response if config.DOMAIN[resource]['hateoas']: response[config.ITEMS] = documents else: response = documents elif soft_delete_enabled and document.get(config.DELETED) is True: # This document was soft deleted. Respond with 404 and the deleted # version of the document. document[config.STATUS] = config.STATUS_ERR, document[config.ERROR] = { 'code': 404, 'message': 'The requested URL was not found on this server.' } return document, last_modified, etag, 404 else: response = document # extra hateoas links if config.DOMAIN[resource]['hateoas']: # use the id of the latest document for multi-document requests if cursor: count = cursor.count(with_limit_and_skip=False) response[config.LINKS] = \ _pagination_links(resource, req, count, latest_doc[config.ID_FIELD]) if config.DOMAIN[resource]['pagination']: response[config.META] = _meta_links(req, count) else: response[config.LINKS] = \ _pagination_links(resource, req, None, response[config.ID_FIELD]) # callbacks not supported on version diffs because of partial documents if version != 'diffs': # TODO: callbacks not currently supported with ?version=all # notify registered callback functions. Please note that, should # the functions modify the document, last_modified and etag # won't be updated to reflect the changes (they always reflect the # documents state on the database). if resource_def['versioning'] is True and version == 'all': versions = response if config.DOMAIN[resource]['hateoas']: versions = response[config.ITEMS] for version_item in versions: getattr(app, "on_fetched_item")(resource, version_item) getattr(app, "on_fetched_item_%s" % resource)(version_item) else: getattr(app, "on_fetched_item")(resource, response) getattr(app, "on_fetched_item_%s" % resource)(response) return response, last_modified, etag, 200
def getitem(resource, **lookup): """ :param resource: the name of the resource to which the document belongs. :param **lookup: the lookup query. .. versionchanged:: 0.4 HATOEAS link for contains the business unit value even when regexes have been configured for the resource endpoint. 'on_fetched' now returns the whole response (HATEOAS metafields included.) Support for document versioning. Changed ``on_fetch_*`` changed to ``on_fetched_*``. .. versionchanged:: 0.3 Support for media fields. When IF_MATCH is disabled, no etag is included in the payload. .. versionchanged:: 0.1.1 Support for Embeded Resource Serialization. .. versionchanged:: 0.1.0 Support for optional HATEOAS. .. versionchanged: 0.0.8 'on_getting_item' event is raised when a document has been read from the database and is about to be sent to the client. .. versionchanged:: 0.0.7 Support for Rate-Limiting. .. versionchanged:: 0.0.6 Support for HEAD requests. .. versionchanged:: 0.0.6 ETag added to payload. .. versionchanged:: 0.0.5 Support for user-restricted access to resources. Support for LAST_UPDATED field missing from documents, because they were created outside the API context. .. versionchanged:: 0.0.4 Added the ``requires_auth`` decorator. .. versionchanged:: 0.0.3 Superflous ``response`` container removed. Links wrapped with ``_links``. Links are now properly JSON formatted. """ req = parse_request(resource) resource_def = config.DOMAIN[resource] embedded_fields = resolve_embedded_fields(resource, req) document = app.data.find_one(resource, req, **lookup) if not document: abort(404) response = {} etag = None version = request.args.get(config.VERSION_PARAM) latest_doc = None # synthesize old document version(s) if resource_def['versioning'] is True: latest_doc = copy.deepcopy(document) document = get_old_document( resource, req, lookup, document, version) # meld into response document build_response_document(document, resource, embedded_fields, latest_doc) # last_modified for the response last_modified = document[config.LAST_UPDATED] # facilitate client caching by returning a 304 when appropriate if config.IF_MATCH: etag = document[config.ETAG] if req.if_none_match and etag == req.if_none_match: # request etag matches the current server representation of the # document, return a 304 Not-Modified. return {}, last_modified, document[config.ETAG], 304 if req.if_modified_since and last_modified <= req.if_modified_since: # request If-Modified-Since conditional request match. We test # this after the etag since Last-Modified dates have lower # resolution (1 second). return {}, last_modified, document.get(config.ETAG), 304 if version == 'all' or version == 'diffs': # find all versions lookup[versioned_id_field()] = lookup[app.config['ID_FIELD']] del lookup[app.config['ID_FIELD']] if version == 'diffs' or req.sort is None: # default sort for 'all', required sort for 'diffs' req.sort = '[("%s", 1)]' % config.VERSION req.if_modified_since = None # we always want the full history here cursor = app.data.find(resource + config.VERSIONS, req, lookup) # build all versions documents = [] if cursor.count() == 0: # this is the scenario when the document existed before # document versioning got turned on documents.append(latest_doc) else: last_document = {} # if we aren't starting on page 1, then we need to init last_doc if version == 'diffs' and req.page > 1: # grab the last document on the previous page to diff from last_version = cursor[0][app.config['VERSION']] - 1 last_document = get_old_document( resource, req, lookup, latest_doc, last_version) for i, document in enumerate(cursor): document = synthesize_versioned_document( latest_doc, document, resource_def) build_response_document( document, resource, embedded_fields, latest_doc) if version == 'diffs': if i == 0: documents.append(document) else: documents.append(diff_document( resource_def, last_document, document)) last_document = document else: documents.append(document) # add documents to response if config.DOMAIN[resource]['hateoas']: response[config.ITEMS] = documents else: response = documents else: response = document # extra hateoas links if config.DOMAIN[resource]['hateoas']: if config.LINKS not in response: response[config.LINKS] = {} response[config.LINKS]['collection'] = { 'title': config.DOMAIN[resource]['resource_title'], 'href': resource_link()} response[config.LINKS]['parent'] = home_link() if version != 'all' and version != 'diffs': # TODO: callbacks not currently supported with ?version=all # notify registered callback functions. Please note that, should # the # functions modify the document, last_modified and etag # won't be updated to reflect the changes (they always reflect the # documents state on the database). getattr(app, "on_fetched_item")(resource, response) getattr(app, "on_fetched_item_%s" % resource)(response) return response, last_modified, etag, 200
def upsert_into_legal_archive(self, item_id): """ Once publish actions are performed on the article do the below: 1. Get legal archive article. 2. De-normalize the expired article 3. Upserting Legal Archive. 4. Get Version History and De-normalize and Inserting Legal Archive Versions :param dict item_id: id of the document from 'archive' collection. """ try: logger.warning('Import item into legal {}.'.format(item_id)) doc = get_resource_service(ARCHIVE).find_one(req=None, _id=item_id) if not doc: logger.error( 'Could not find the document {} to import to legal archive.' .format(item_id)) return # setting default values in case they are missing other log message will fail. doc.setdefault('unique_name', 'NO UNIQUE NAME') doc.setdefault(config.VERSION, 1) doc.setdefault('expiry', utcnow()) if not doc.get(ITEM_STATE) in { CONTENT_STATE.PUBLISHED, CONTENT_STATE.CORRECTED, CONTENT_STATE.KILLED }: # at times we have seen that item is published but the item is different in the archive collection # this will notify admins about the issue but proceed to move the item into legal archive. msg = 'Invalid state: {}. Moving the item to legal archive. item: {}'.\ format(doc.get(ITEM_STATE), self.log_msg_format.format(**doc)) logger.error(msg) update_notifiers(ACTIVITY_ERROR, msg=msg, resource=ARCHIVE) # required for behave test. legal_archive_doc = deepcopy(doc) legal_archive_service = get_resource_service(LEGAL_ARCHIVE_NAME) legal_archive_versions_service = get_resource_service( LEGAL_ARCHIVE_VERSIONS_NAME) log_msg = self.log_msg_format.format(**legal_archive_doc) version_id_field = versioned_id_field( app.config['DOMAIN'][ARCHIVE]) logger.info( 'Preparing Article to be inserted into Legal Archive ' + log_msg) # Removing irrelevant properties legal_archive_doc.pop(config.ETAG, None) legal_archive_doc.pop('lock_user', None) legal_archive_doc.pop('lock_session', None) legal_archive_doc.pop('lock_time', None) logger.info( 'Removed irrelevant properties from the article {}'.format( log_msg)) # Step 1 article_in_legal_archive = legal_archive_service.find_one( req=None, _id=legal_archive_doc[config.ID_FIELD]) if article_in_legal_archive and \ article_in_legal_archive.get(config.VERSION, 0) > legal_archive_doc.get(config.VERSION): logger.info( 'Item {} version: {} already in legal archive. Legal Archive document version {}' .format(legal_archive_doc.get(config.ID_FIELD), legal_archive_doc.get(config.VERSION), article_in_legal_archive.get(config.VERSION))) self._set_moved_to_legal(doc) return # Step 2 - De-normalizing the legal archive doc self._denormalize_user_desk(legal_archive_doc, log_msg) logger.info('De-normalized article {}'.format(log_msg)) # Step 3 - Upserting Legal Archive logger.info( 'Upserting Legal Archive Repo with article {}'.format(log_msg)) if article_in_legal_archive: legal_archive_service.put(legal_archive_doc[config.ID_FIELD], legal_archive_doc) else: legal_archive_service.post([legal_archive_doc]) # Step 4 - Get Version History and De-normalize and Inserting Legal Archive Versions lookup = {version_id_field: legal_archive_doc[config.ID_FIELD]} version_history = list( get_resource_service('archive_versions').get(req=None, lookup=lookup)) legal_version_history = list( legal_archive_versions_service.get(req=None, lookup=lookup)) logger.info( 'Fetched version history for article {}'.format(log_msg)) versions_to_insert = [ version for version in version_history if not any(legal_version for legal_version in legal_version_history if version[config.VERSION] == legal_version[ config.VERSION]) ] # This happens when user kills an article from Dusty Archive if article_in_legal_archive and \ article_in_legal_archive[config.VERSION] < legal_archive_doc[config.VERSION] and \ len(versions_to_insert) == 0: resource_def = app.config['DOMAIN'][ARCHIVE] versioned_doc = deepcopy(legal_archive_doc) versioned_doc[versioned_id_field( resource_def)] = legal_archive_doc[config.ID_FIELD] versioned_doc[config.ID_FIELD] = ObjectId() versions_to_insert.append(versioned_doc) for version_doc in versions_to_insert: self._denormalize_user_desk( version_doc, self.log_msg_format.format( _id=version_doc[version_id_field], unique_name=version_doc.get('unique_name'), _current_version=version_doc[config.VERSION], expiry=version_doc.get('expiry'))) version_doc.pop(config.ETAG, None) if versions_to_insert: legal_archive_versions_service.post(versions_to_insert) logger.info( 'Inserted de-normalized version history for article {}'. format(log_msg)) # Set the flag that item is moved to legal. self._set_moved_to_legal(doc) logger.info('Upsert completed for article ' + log_msg) except: logger.exception( 'Failed to import into legal archive {}.'.format(item_id)) raise
def getitem(resource, **lookup): """ :param resource: the name of the resource to which the document belongs. :param **lookup: the lookup query. .. versionchanged:: 0.4 HATOEAS link for contains the business unit value even when regexes have been configured for the resource endpoint. 'on_fetched' now returns the whole response (HATEOAS metafields included.) Support for document versioning. Changed ``on_fetch_*`` changed to ``on_fetched_*``. .. versionchanged:: 0.3 Support for media fields. When IF_MATCH is disabled, no etag is included in the payload. .. versionchanged:: 0.1.1 Support for Embeded Resource Serialization. .. versionchanged:: 0.1.0 Support for optional HATEOAS. .. versionchanged: 0.0.8 'on_getting_item' event is raised when a document has been read from the database and is about to be sent to the client. .. versionchanged:: 0.0.7 Support for Rate-Limiting. .. versionchanged:: 0.0.6 Support for HEAD requests. .. versionchanged:: 0.0.6 ETag added to payload. .. versionchanged:: 0.0.5 Support for user-restricted access to resources. Support for LAST_UPDATED field missing from documents, because they were created outside the API context. .. versionchanged:: 0.0.4 Added the ``requires_auth`` decorator. .. versionchanged:: 0.0.3 Superflous ``response`` container removed. Links wrapped with ``_links``. Links are now properly JSON formatted. """ req = parse_request(resource) resource_def = config.DOMAIN[resource] embedded_fields = resolve_embedded_fields(resource, req) document = app.data.find_one(resource, req, **lookup) if not document: abort(404) response = {} etag = None version = request.args.get(config.VERSION_PARAM) latest_doc = None # synthesize old document version(s) if resource_def['versioning'] is True: latest_doc = copy.deepcopy(document) document = get_old_document(resource, req, lookup, document, version) # meld into response document build_response_document(document, resource, embedded_fields, latest_doc) # last_modified for the response last_modified = document[config.LAST_UPDATED] # facilitate client caching by returning a 304 when appropriate if config.IF_MATCH: etag = document[config.ETAG] if req.if_none_match and etag == req.if_none_match: # request etag matches the current server representation of the # document, return a 304 Not-Modified. return {}, last_modified, document[config.ETAG], 304 if req.if_modified_since and last_modified <= req.if_modified_since: # request If-Modified-Since conditional request match. We test # this after the etag since Last-Modified dates have lower # resolution (1 second). return {}, last_modified, document.get(config.ETAG), 304 if version == 'all' or version == 'diffs': # find all versions lookup[versioned_id_field()] = lookup[app.config['ID_FIELD']] del lookup[app.config['ID_FIELD']] if version == 'diffs' or req.sort is None: # default sort for 'all', required sort for 'diffs' req.sort = '[("%s", 1)]' % config.VERSION req.if_modified_since = None # we always want the full history here cursor = app.data.find(resource + config.VERSIONS, req, lookup) # build all versions documents = [] if cursor.count() == 0: # this is the scenario when the document existed before # document versioning got turned on documents.append(latest_doc) else: last_document = {} # if we aren't starting on page 1, then we need to init last_doc if version == 'diffs' and req.page > 1: # grab the last document on the previous page to diff from last_version = cursor[0][app.config['VERSION']] - 1 last_document = get_old_document(resource, req, lookup, latest_doc, last_version) for i, document in enumerate(cursor): document = synthesize_versioned_document( latest_doc, document, resource_def) build_response_document(document, resource, embedded_fields, latest_doc) if version == 'diffs': if i == 0: documents.append(document) else: documents.append( diff_document(resource_def, last_document, document)) last_document = document else: documents.append(document) # add documents to response if config.DOMAIN[resource]['hateoas']: response[config.ITEMS] = documents else: response = documents else: response = document # extra hateoas links if config.DOMAIN[resource]['hateoas']: if config.LINKS not in response: response[config.LINKS] = {} response[config.LINKS]['collection'] = { 'title': config.DOMAIN[resource]['resource_title'], 'href': resource_link() } response[config.LINKS]['parent'] = home_link() if version != 'all' and version != 'diffs': # TODO: callbacks not currently supported with ?version=all # notify registered callback functions. Please note that, should # the # functions modify the document, last_modified and etag # won't be updated to reflect the changes (they always reflect the # documents state on the database). getattr(app, "on_fetched_item")(resource, response) getattr(app, "on_fetched_item_%s" % resource)(response) return response, last_modified, etag, 200
def upsert_into_legal_archive(self, item_id): """Once publish actions are performed on the article do the below: 1. Get legal archive article. 2. De-normalize the expired article 3. Upserting Legal Archive. 4. Get Version History and De-normalize and Inserting Legal Archive Versions 5. Get History and de-normalize and insert into Legal Archive History :param dict item_id: id of the document from 'archive' collection. """ try: logger.info('Import item into legal {}.'.format(item_id)) doc = get_resource_service(ARCHIVE).find_one(req=None, _id=item_id) if not doc: logger.error('Could not find the document {} to import to legal archive.'.format(item_id)) return # setting default values in case they are missing other log message will fail. doc.setdefault('unique_name', 'NO UNIQUE NAME') doc.setdefault(config.VERSION, 1) doc.setdefault('expiry', utcnow()) if not doc.get(ITEM_STATE) in {CONTENT_STATE.PUBLISHED, CONTENT_STATE.CORRECTED, CONTENT_STATE.KILLED}: # at times we have seen that item is published but the item is different in the archive collection # this will notify admins about the issue but proceed to move the item into legal archive. msg = 'Invalid state: {}. Moving the item to legal archive. item: {}'.\ format(doc.get(ITEM_STATE), self.log_msg_format.format(**doc)) logger.error(msg) update_notifiers(ACTIVITY_ERROR, msg=msg, resource=ARCHIVE) # required for behave test. legal_archive_doc = deepcopy(doc) legal_archive_service = get_resource_service(LEGAL_ARCHIVE_NAME) legal_archive_versions_service = get_resource_service(LEGAL_ARCHIVE_VERSIONS_NAME) legal_archive_history_service = get_resource_service(LEGAL_ARCHIVE_HISTORY_NAME) log_msg = self.log_msg_format.format(**legal_archive_doc) version_id_field = versioned_id_field(app.config['DOMAIN'][ARCHIVE]) logger.info('Preparing Article to be inserted into Legal Archive ' + log_msg) # Removing irrelevant properties legal_archive_doc.pop(config.ETAG, None) legal_archive_doc.pop('lock_user', None) legal_archive_doc.pop('lock_session', None) legal_archive_doc.pop('lock_time', None) legal_archive_doc.pop('lock_action', None) logger.info('Removed irrelevant properties from the article {}'.format(log_msg)) # Step 1 article_in_legal_archive = legal_archive_service.find_one(req=None, _id=legal_archive_doc[config.ID_FIELD]) if article_in_legal_archive and \ article_in_legal_archive.get(config.VERSION, 0) > legal_archive_doc.get(config.VERSION): logger.info('Item {} version: {} already in legal archive. Legal Archive document version {}'.format( legal_archive_doc.get(config.ID_FIELD), legal_archive_doc.get(config.VERSION), article_in_legal_archive.get(config.VERSION) )) self._set_moved_to_legal(doc) return # Step 2 - De-normalizing the legal archive doc self._denormalize_user_desk(legal_archive_doc, log_msg) logger.info('De-normalized article {}'.format(log_msg)) # Step 3 - Upserting Legal Archive logger.info('Upserting Legal Archive Repo with article {}'.format(log_msg)) if article_in_legal_archive: legal_archive_service.put(legal_archive_doc[config.ID_FIELD], legal_archive_doc) else: legal_archive_service.post([legal_archive_doc]) # Step 4 - Get Versions and De-normalize and Inserting Legal Archive Versions lookup = {version_id_field: legal_archive_doc[config.ID_FIELD]} versions = list(get_resource_service('archive_versions').get(req=None, lookup=lookup)) legal_versions = list(legal_archive_versions_service.get(req=None, lookup=lookup)) logger.info('Fetched version history for article {}'.format(log_msg)) versions_to_insert = [version for version in versions if not any(legal_version for legal_version in legal_versions if version[config.VERSION] == legal_version[config.VERSION])] # Step 5 - Get History and de-normalize and insert into Legal Archive History lookup = {'item_id': legal_archive_doc[config.ID_FIELD]} history_items = list(get_resource_service('archive_history').get(req=None, lookup=lookup)) legal_history_items = list(legal_archive_history_service.get(req=None, lookup=lookup)) logger.info('Fetched history for article {}'.format(log_msg)) history_to_insert = [history for history in history_items if not any(legal_version for legal_version in legal_history_items if history[config.ID_FIELD] == legal_version[config.ID_FIELD])] # This happens when user kills an article from Dusty Archive if article_in_legal_archive and \ article_in_legal_archive[config.VERSION] < legal_archive_doc[config.VERSION] and \ len(versions_to_insert) == 0: resource_def = app.config['DOMAIN'][ARCHIVE] versioned_doc = deepcopy(legal_archive_doc) versioned_doc[versioned_id_field(resource_def)] = legal_archive_doc[config.ID_FIELD] versioned_doc[config.ID_FIELD] = ObjectId() versions_to_insert.append(versioned_doc) for version_doc in versions_to_insert: self._denormalize_user_desk(version_doc, self.log_msg_format.format(_id=version_doc[version_id_field], unique_name=version_doc.get('unique_name'), _current_version=version_doc[config.VERSION], expiry=version_doc.get('expiry'))) version_doc.pop(config.ETAG, None) if versions_to_insert: legal_archive_versions_service.post(versions_to_insert) logger.info('Inserted de-normalized versions for article {}'.format(log_msg)) for history_doc in history_to_insert: self._denormalize_history(history_doc) history_doc.pop(config.ETAG, None) if history_to_insert: legal_archive_history_service.post(history_to_insert) logger.info('Inserted de-normalized history for article {}'.format(log_msg)) # Set the flag that item is moved to legal. self._set_moved_to_legal(doc) logger.info('Upsert completed for article ' + log_msg) except: logger.exception('Failed to import into legal archive {}.'.format(item_id)) raise
def upsert_into_legal_archive(self, item_id): """ Once publish actions are performed on the article do the below: 1. Get legal archive article. 2. De-normalize the expired article 3. Upserting Legal Archive. 4. Get Version History and De-normalize and Inserting Legal Archive Versions :param dict item_id: id of the document from 'archive' collection. """ try: logger.warning("Import item into legal {}.".format(item_id)) doc = get_resource_service(ARCHIVE).find_one(req=None, _id=item_id) if not doc: logger.error("Could not find the document {} to import to legal archive.".format(item_id)) return if not doc.get(ITEM_STATE) in {CONTENT_STATE.PUBLISHED, CONTENT_STATE.CORRECTED, CONTENT_STATE.KILLED}: logger.error( "Invalid state: {}. Cannot move the item to legal archive. item: {}".format( doc.get(ITEM_STATE), self.log_msg_format.format(**doc) ) ) return # required for behave test. legal_archive_doc = deepcopy(doc) legal_archive_service = get_resource_service(LEGAL_ARCHIVE_NAME) legal_archive_versions_service = get_resource_service(LEGAL_ARCHIVE_VERSIONS_NAME) log_msg = self.log_msg_format.format(**legal_archive_doc) version_id_field = versioned_id_field(app.config["DOMAIN"][ARCHIVE]) logger.info("Preparing Article to be inserted into Legal Archive " + log_msg) # Removing irrelevant properties legal_archive_doc.pop(config.ETAG, None) legal_archive_doc.pop("lock_user", None) legal_archive_doc.pop("lock_session", None) legal_archive_doc.pop("lock_time", None) logger.info("Removed irrelevant properties from the article {}".format(log_msg)) # Step 1 article_in_legal_archive = legal_archive_service.find_one(req=None, _id=legal_archive_doc[config.ID_FIELD]) if article_in_legal_archive and article_in_legal_archive.get(config.VERSION, 0) > legal_archive_doc.get( config.VERSION ): logger.info( "Item {} version: {} already in legal archive. Legal Archive document version {}".format( legal_archive_doc.get(config.ID_FIELD), legal_archive_doc.get(config.VERSION), article_in_legal_archive.get(config.VERSION), ) ) self._set_moved_to_legal(doc) return # Step 2 - De-normalizing the legal archive doc self._denormalize_user_desk(legal_archive_doc, log_msg) logger.info("De-normalized article {}".format(log_msg)) # Step 3 - Upserting Legal Archive logger.info("Upserting Legal Archive Repo with article {}".format(log_msg)) if article_in_legal_archive: legal_archive_service.put(legal_archive_doc[config.ID_FIELD], legal_archive_doc) else: legal_archive_service.post([legal_archive_doc]) # Step 4 - Get Version History and De-normalize and Inserting Legal Archive Versions lookup = {version_id_field: legal_archive_doc[config.ID_FIELD]} version_history = list(get_resource_service("archive_versions").get(req=None, lookup=lookup)) legal_version_history = list(legal_archive_versions_service.get(req=None, lookup=lookup)) logger.info("Fetched version history for article {}".format(log_msg)) versions_to_insert = [ version for version in version_history if not any( legal_version for legal_version in legal_version_history if version[config.VERSION] == legal_version[config.VERSION] ) ] # This happens when user kills an article from Dusty Archive if ( article_in_legal_archive and article_in_legal_archive[config.VERSION] < legal_archive_doc[config.VERSION] and len(versions_to_insert) == 0 ): resource_def = app.config["DOMAIN"][ARCHIVE] versioned_doc = deepcopy(legal_archive_doc) versioned_doc[versioned_id_field(resource_def)] = legal_archive_doc[config.ID_FIELD] versioned_doc[config.ID_FIELD] = ObjectId() versions_to_insert.append(versioned_doc) for version_doc in versions_to_insert: self._denormalize_user_desk( version_doc, self.log_msg_format.format( _id=version_doc[version_id_field], unique_name=version_doc.get("unique_name"), _current_version=version_doc[config.VERSION], expiry=version_doc.get("expiry"), ), ) version_doc.pop(config.ETAG, None) if versions_to_insert: legal_archive_versions_service.post(versions_to_insert) logger.info("Inserted de-normalized version history for article {}".format(log_msg)) # Set the flag that item is moved to legal. self._set_moved_to_legal(doc) logger.info("Upsert completed for article " + log_msg) except: logger.exception("Failed to import into legal archive {}.".format(item_id)) raise
def _init_article_versions(self): resource_def = self.app.config['DOMAIN']['archive_versions'] version_id = versioned_id_field(resource_def) return [{'guid': 'tag:localhost:2015:69b961ab-2816-4b8a-a584-a7b402fed4f9', version_id: '1', ITEM_TYPE: CONTENT_TYPE.TEXT, config.VERSION: 1, 'urgency': 4, 'pubstatus': 'usable', 'firstcreated': utcnow(), 'byline': 'By Alan Karben', 'dateline': {'located': {'city': 'Sydney'}}, 'keywords': ['Student', 'Crime', 'Police', 'Missing'], 'subject': [{'qcode': '17004000', 'name': 'Statistics'}, {'qcode': '04001002', 'name': 'Weather'}], ITEM_STATE: CONTENT_STATE.DRAFT, 'expiry': utcnow() + timedelta(minutes=20), 'unique_name': '#8'}, {'guid': 'tag:localhost:2015:69b961ab-2816-4b8a-a584-a7b402fed4f9', version_id: '1', ITEM_TYPE: CONTENT_TYPE.TEXT, config.VERSION: 2, 'urgency': 4, 'headline': 'Two students missing', 'pubstatus': 'usable', 'firstcreated': utcnow(), 'byline': 'By Alan Karben', 'dateline': {'located': {'city': 'Sydney'}}, 'keywords': ['Student', 'Crime', 'Police', 'Missing'], 'subject': [{'qcode': '17004000', 'name': 'Statistics'}, {'qcode': '04001002', 'name': 'Weather'}], ITEM_STATE: CONTENT_STATE.SUBMITTED, 'expiry': utcnow() + timedelta(minutes=20), 'unique_name': '#8'}, {'guid': 'tag:localhost:2015:69b961ab-2816-4b8a-a584-a7b402fed4f9', version_id: '1', ITEM_TYPE: CONTENT_TYPE.TEXT, config.VERSION: 3, 'urgency': 4, 'headline': 'Two students missing', 'pubstatus': 'usable', 'firstcreated': utcnow(), 'byline': 'By Alan Karben', 'ednote': 'Andrew Marwood contributed to this article', 'dateline': {'located': {'city': 'Sydney'}}, 'keywords': ['Student', 'Crime', 'Police', 'Missing'], 'subject': [{'qcode': '17004000', 'name': 'Statistics'}, {'qcode': '04001002', 'name': 'Weather'}], ITEM_STATE: CONTENT_STATE.PROGRESS, 'expiry': utcnow() + timedelta(minutes=20), 'unique_name': '#8'}, {'guid': 'tag:localhost:2015:69b961ab-2816-4b8a-a584-a7b402fed4f9', version_id: '1', ITEM_TYPE: CONTENT_TYPE.TEXT, config.VERSION: 4, 'body_html': 'Test body', 'urgency': 4, 'headline': 'Two students missing', 'pubstatus': 'usable', 'firstcreated': utcnow(), 'byline': 'By Alan Karben', 'ednote': 'Andrew Marwood contributed to this article', 'dateline': {'located': {'city': 'Sydney'}}, 'keywords': ['Student', 'Crime', 'Police', 'Missing'], 'subject': [{'qcode': '17004000', 'name': 'Statistics'}, {'qcode': '04001002', 'name': 'Weather'}], ITEM_STATE: CONTENT_STATE.PROGRESS, 'expiry': utcnow() + timedelta(minutes=20), 'unique_name': '#8'}]
def deleteitem_internal( resource, concurrency_check=False, suppress_callbacks=False, **lookup): """ Intended for internal delete calls, this method is not rate limited, authentication is not checked, pre-request events are not raised, and concurrency checking is optional. Deletes a resource item. :param resource: name of the resource to which the item(s) belong. :param concurrency_check: concurrency check switch (bool) :param **lookup: item lookup query. .. versionchanged:: 0.6 Support for soft delete. .. versionchanged:: 0.5 Return 204 NoContent instead of 200. Push updates to OpLog. Original deleteitem() has been split into deleteitem() and deleteitem_internal(). .. versionchanged:: 0.4 Fix #284: If you have a media field, and set datasource projection to 0 for that field, the media will not be deleted. Support for document versioning. 'on_delete_item' events raised before performing the delete. 'on_deleted_item' events raised after performing the delete. .. versionchanged:: 0.3 Delete media files as needed. Pass the explicit query filter to the data driver, as it does not support the id argument anymore. .. versionchanged:: 0.2 Raise pre_<method> event. .. versionchanged:: 0.0.7 Support for Rate-Limiting. .. versionchanged:: 0.0.5 Pass current resource to ``parse_request``, allowing for proper processing of new configuration settings: `filters`, `sorting`, `paging`. .. versionchanged:: 0.0.4 Added the ``requires_auth`` decorator. """ resource_def = config.DOMAIN[resource] soft_delete_enabled = resource_def['soft_delete'] original = get_document(resource, concurrency_check, **lookup) if not original or (soft_delete_enabled and original.get(config.DELETED) is True): abort(404) # notify callbacks if suppress_callbacks is not True: getattr(app, "on_delete_item")(resource, original) getattr(app, "on_delete_item_%s" % resource)(original) if soft_delete_enabled: # Instead of removing the document from the db, just mark it as deleted marked_document = copy.deepcopy(original) # Set DELETED flag and update metadata last_modified = datetime.utcnow().replace(microsecond=0) marked_document[config.DELETED] = True marked_document[config.LAST_UPDATED] = last_modified if config.IF_MATCH: resolve_document_etag(marked_document, resource) resolve_document_version(marked_document, resource, 'DELETE', original) # Update document in database (including version collection if needed) id = original[resource_def['id_field']] try: app.data.replace(resource, id, marked_document, original) except app.data.OriginalChangedError: if concurrency_check: abort(412, description='Client and server etags don\'t match') # create previous version if it wasn't already there late_versioning_catch(original, resource) # and add deleted version insert_versioning_documents(resource, marked_document) # update oplog if needed oplog_push(resource, marked_document, 'DELETE', id) else: # Delete the document for real # media cleanup media_fields = app.config['DOMAIN'][resource]['_media'] # document might miss one or more media fields because of datasource # and/or client projection. missing_media_fields = [f for f in media_fields if f not in original] if len(missing_media_fields): # retrieve the whole document so we have all media fields available # Should be very a rare occurence. We can't get rid of the # get_document() call since it also deals with etag matching, which # is still needed. Also, this lookup should never fail. # TODO not happy with this hack. Not at all. Is there a better way? original = app.data.find_one_raw( resource, original[resource_def['id_field']]) for field in media_fields: if field in original: app.media.delete(original[field], resource) id = original[resource_def['id_field']] app.data.remove(resource, {resource_def['id_field']: id}) # TODO: should attempt to delete version collection even if setting is # off if app.config['DOMAIN'][resource]['versioning'] is True: app.data.remove( resource + config.VERSIONS, {versioned_id_field(resource_def): original[resource_def['id_field']]}) # update oplog if needed oplog_push(resource, original, 'DELETE', id) if suppress_callbacks is not True: getattr(app, "on_deleted_item")(resource, original) getattr(app, "on_deleted_item_%s" % resource)(original) return {}, None, None, 204
def deleteitem_internal(resource, concurrency_check=False, **lookup): """ Intended for internal delete calls, this method is not rate limited, authentication is not checked, pre-request events are not raised, and concurrency checking is optional. Deletes a resource item. :param resource: name of the resource to which the item(s) belong. :param concurrency_check: concurrency check switch (bool) :param **lookup: item lookup query. .. versionchanged:: 0.5 Return 204 NoContent instead of 200. Push updates to OpLog. Original deleteitem() has been split into deleteitem() and deleteitem_internal(). .. versionchanged:: 0.4 Fix #284: If you have a media field, and set datasource projection to 0 for that field, the media will not be deleted. Support for document versioning. 'on_delete_item' events raised before performing the delete. 'on_deleted_item' events raised after performing the delete. .. versionchanged:: 0.3 Delete media files as needed. Pass the explicit query filter to the data driver, as it does not support the id argument anymore. .. versionchanged:: 0.2 Raise pre_<method> event. .. versionchanged:: 0.0.7 Support for Rate-Limiting. .. versionchanged:: 0.0.5 Pass current resource to ``parse_request``, allowing for proper processing of new configuration settings: `filters`, `sorting`, `paging`. .. versionchanged:: 0.0.4 Added the ``requires_auth`` decorator. """ original = get_document(resource, concurrency_check, **lookup) if not original: abort(404) # notify callbacks getattr(app, "on_delete_item")(resource, original) getattr(app, "on_delete_item_%s" % resource)(original) # media cleanup media_fields = app.config['DOMAIN'][resource]['_media'] # document might miss one or more media fields because of datasource and/or # client projection. missing_media_fields = [f for f in media_fields if f not in original] if len(missing_media_fields): # retrieve the whole document so we have all media fields available. # Should be very a rare occurence. We can't get rid of the # get_document() call since it also deals with etag matching, which is # still needed. Also, this lookup should never fail. # TODO not happy with this hack. Not at all. Is there a better way? original = app.data.find_one_raw(resource, original[config.ID_FIELD]) for field in media_fields: if field in original: app.media.delete(original[field]) id = original[config.ID_FIELD] app.data.remove(resource, {config.ID_FIELD: id}) # update oplog if needed oplog_push(resource, original, 'DELETE', id) # TODO: should attempt to delete version collection even if setting is off if app.config['DOMAIN'][resource]['versioning'] is True: app.data.remove( resource + config.VERSIONS, {versioned_id_field(): original[config.ID_FIELD]}) getattr(app, "on_deleted_item")(resource, original) getattr(app, "on_deleted_item_%s" % resource)(original) return {}, None, None, 204
def remove_expired(self, doc): """ Removes the expired published article from 'published' collection. Below is the workflow: 1. If doc is a package then recursively move the items in the package to legal archive if the item wasn't moved before. And then run the package through the expiry workflow. 2. Check if doc has expired. This is needed because when doc is a package and expired but the items in the package are not expired. If expired then update allow_post_publish_actions, can_be_removed flags. 3. Insert/update the doc in Legal Archive repository (a) All references to master data like users, desks ... are de-normalized before inserting into Legal Archive. Same is done to each version of the article. (b) Inserts Transmission Details (fetched from publish_queue collection) 4. If the doc has expired then remove the transmission details from Publish Queue collection. 5. If the doc has expired and is eligible to be removed from production then remove the article and its versions from archive and archive_versions collections respectively. 6. Removes the item from published collection, if can_be_removed is True :param doc: doc in 'published' collection """ log_msg_format = "{{'_id': {item_id}, 'unique_name': {unique_name}, 'version': {_current_version}, " \ "'expired_on': {expiry}}}." log_msg = log_msg_format.format(**doc) version_id_field = versioned_id_field(app.config['DOMAIN'][ARCHIVE]) can_be_removed = doc['can_be_removed'] if not can_be_removed: if doc[ITEM_TYPE] == CONTENT_TYPE.COMPOSITE: # Step 1 logging.info( 'Starting the workflow for removal of the expired package ' + log_msg) self._handle_expired_package(doc) logging.info( 'Starting the workflow for removal of the expired item ' + log_msg) is_expired = doc['expiry'] <= utcnow() if is_expired: # Step 2 updates = self._update_flags(doc, log_msg) doc.update(updates) can_be_removed = updates.get('can_be_removed', can_be_removed) # Step 3 # publish_queue_items = self._upsert_into_legal_archive(doc, version_id_field, log_msg_format, log_msg) publish_queue_items = [] if is_expired: # Step 4 logging.info( 'Removing the transmission details for expired item ' + log_msg) for publish_queue_item in publish_queue_items: get_resource_service('publish_queue').delete_action( lookup={ config.ID_FIELD: publish_queue_item[ config.ID_FIELD] }) if is_expired and self.can_remove_from_production(doc): # Step 5 logging.info('Removing the expired item from production ' + log_msg) lookup = { '$and': [{ version_id_field: doc['item_id'] }, { config.VERSION: { '$lte': doc[config.VERSION] } }] } get_resource_service('archive_versions').delete(lookup) get_resource_service(ARCHIVE).delete_action( {config.ID_FIELD: doc['item_id']}) if can_be_removed: # Step 6 logging.info( 'Removing the expired item from published collection ' + log_msg) self.delete_by_article_id(_id=doc['item_id'], doc=doc) logging.info( 'Completed the workflow for removing the expired publish item ' + log_msg)
def deleteitem_internal(resource, concurrency_check=False, suppress_callbacks=False, original=None, **lookup): """ Intended for internal delete calls, this method is not rate limited, authentication is not checked, pre-request events are not raised, and concurrency checking is optional. Deletes a resource item. :param resource: name of the resource to which the item(s) belong. :param concurrency_check: concurrency check switch (bool) :param original: original document if already fetched from the database :param **lookup: item lookup query. .. versionchanged:: 0.6 Support for soft delete. .. versionchanged:: 0.5 Return 204 NoContent instead of 200. Push updates to OpLog. Original deleteitem() has been split into deleteitem() and deleteitem_internal(). .. versionchanged:: 0.4 Fix #284: If you have a media field, and set datasource projection to 0 for that field, the media will not be deleted. Support for document versioning. 'on_delete_item' events raised before performing the delete. 'on_deleted_item' events raised after performing the delete. .. versionchanged:: 0.3 Delete media files as needed. Pass the explicit query filter to the data driver, as it does not support the id argument anymore. .. versionchanged:: 0.2 Raise pre_<method> event. .. versionchanged:: 0.0.7 Support for Rate-Limiting. .. versionchanged:: 0.0.5 Pass current resource to ``parse_request``, allowing for proper processing of new configuration settings: `filters`, `sorting`, `paging`. .. versionchanged:: 0.0.4 Added the ``requires_auth`` decorator. """ resource_def = config.DOMAIN[resource] soft_delete_enabled = resource_def["soft_delete"] original = get_document(resource, concurrency_check, original, force_auth_field_projection=soft_delete_enabled, **lookup) if not original or (soft_delete_enabled and original.get(config.DELETED) is True): return all_done() # notify callbacks if not suppress_callbacks: getattr(app, "on_delete_item")(resource, original) getattr(app, "on_delete_item_%s" % resource)(original) if soft_delete_enabled: # Instead of removing the document from the db, just mark it as deleted marked_document = copy.deepcopy(original) # Set DELETED flag and update metadata last_modified = datetime.utcnow().replace(microsecond=0) marked_document[config.DELETED] = True marked_document[config.LAST_UPDATED] = last_modified if config.IF_MATCH: resolve_document_etag(marked_document, resource) resolve_document_version(marked_document, resource, "DELETE", original) # Update document in database (including version collection if needed) id = original[resource_def["id_field"]] try: app.data.replace(resource, id, marked_document, original) except app.data.OriginalChangedError: if concurrency_check: abort(412, description="Client and server etags don't match") # create previous version if it wasn't already there late_versioning_catch(original, resource) # and add deleted version insert_versioning_documents(resource, marked_document) # update oplog if needed oplog_push(resource, marked_document, "DELETE", id) else: # Delete the document for real # media cleanup media_fields = app.config["DOMAIN"][resource]["_media"] # document might miss one or more media fields because of datasource # and/or client projection. missing_media_fields = [f for f in media_fields if f not in original] if missing_media_fields: # retrieve the whole document so we have all media fields available # Should be very a rare occurrence. We can't get rid of the # get_document() call since it also deals with etag matching, which # is still needed. Also, this lookup should never fail. # TODO not happy with this hack. Not at all. Is there a better way? original = app.data.find_one_raw(resource, **lookup) for field in media_fields: if field in original: media_field = original[field] if isinstance(media_field, list): for file_id in media_field: app.media.delete(file_id, resource) else: app.media.delete(original[field], resource) id = original[resource_def["id_field"]] app.data.remove(resource, lookup) # TODO: should attempt to delete version collection even if setting is # off if app.config["DOMAIN"][resource]["versioning"] is True: app.data.remove( resource + config.VERSIONS, { versioned_id_field(resource_def): original[resource_def["id_field"]] }, ) # update oplog if needed oplog_push(resource, original, "DELETE", id) if not suppress_callbacks: getattr(app, "on_deleted_item")(resource, original) getattr(app, "on_deleted_item_%s" % resource)(original) return all_done()
def upsert_into_legal_archive(self, doc): """ Once publish actions are performed on the article do the below: 1. Get legal archive article. 2. De-normalize the expired article 3. Upserting Legal Archive. 4. Get Version History and De-normalize and Inserting Legal Archive Versions :param dict doc: doc from 'archive' collection. """ if not doc.get(ITEM_STATE) in {CONTENT_STATE.PUBLISHED, CONTENT_STATE.CORRECTED, CONTENT_STATE.KILLED}: logger.exception('Invalid state: {}. Cannot move the item to legal archive. item: {}'. format(doc.get(ITEM_STATE), self.log_msg_format.format(**doc))) return # required for behave test. legal_archive_doc = deepcopy(doc) legal_archive_service = get_resource_service(LEGAL_ARCHIVE_NAME) legal_archive_versions_service = get_resource_service(LEGAL_ARCHIVE_VERSIONS_NAME) log_msg = self.log_msg_format.format(**legal_archive_doc) version_id_field = versioned_id_field(app.config['DOMAIN'][ARCHIVE]) logger.info('Preparing Article to be inserted into Legal Archive ' + log_msg) # Removing Irrelevant properties legal_archive_doc.pop(config.ETAG, None) legal_archive_doc.pop('lock_user', None) legal_archive_doc.pop('lock_session', None) legal_archive_doc.pop('lock_time', None) logger.info('Removed irrelevant properties from the article ' + log_msg) # Step 1 article_in_legal_archive = legal_archive_service.find_one(req=None, _id=legal_archive_doc[config.ID_FIELD]) # Step 2 - De-normalizing the legal archive doc self._denormalize_user_desk(legal_archive_doc, log_msg) logger.info('De-normalized article ' + log_msg) # Step 3 - Upserting Legal Archive logger.info('Upserting Legal Archive Repo with article ' + log_msg) if article_in_legal_archive: legal_archive_service.put(legal_archive_doc[config.ID_FIELD], legal_archive_doc) else: legal_archive_service.post([legal_archive_doc]) # Step 4 - Get Version History and De-normalize and Inserting Legal Archive Versions lookup = {version_id_field: legal_archive_doc[config.ID_FIELD]} version_history = list(get_resource_service('archive_versions').get(req=None, lookup=lookup)) legal_version_history = list(legal_archive_versions_service.get(req=None, lookup=lookup)) logger.info('Fetched version history for article ' + log_msg) versions_to_insert = [version for version in version_history if not any(legal_version for legal_version in legal_version_history if version[config.VERSION] == legal_version[config.VERSION])] for version_doc in versions_to_insert: self._denormalize_user_desk(version_doc, self.log_msg_format.format(_id=version_doc[version_id_field], unique_name=version_doc['unique_name'], _current_version=version_doc[config.VERSION], expiry=version_doc['expiry'])) del version_doc[config.ETAG] if versions_to_insert: legal_archive_versions_service.post(versions_to_insert) logger.info('Inserted de-normalized version history for article ' + log_msg) logger.info('Upsert completed for article ' + log_msg)
def __init_article_versions(self): return [{'guid': 'tag:localhost:2015:69b961ab-2816-4b8a-a584-a7b402fed4f9', versioned_id_field(): '8', 'type': 'text', config.VERSION: 1, 'urgency': 4, 'pubstatus': 'usable', 'firstcreated': utcnow(), 'byline': 'By Alan Karben', 'dateline': 'Sydney', 'keywords': ['Student', 'Crime', 'Police', 'Missing'], 'subject': [{'qcode': '17004000', 'name': 'Statistics'}, {'qcode': '04001002', 'name': 'Weather'}], 'state': 'draft', 'expiry': utcnow() + timedelta(minutes=20), 'unique_name': '#8'}, {'guid': 'tag:localhost:2015:69b961ab-2816-4b8a-a584-a7b402fed4f9', versioned_id_field(): '8', 'type': 'text', config.VERSION: 2, 'urgency': 4, 'headline': 'Two students missing', 'pubstatus': 'usable', 'firstcreated': utcnow(), 'byline': 'By Alan Karben', 'dateline': 'Sydney', 'keywords': ['Student', 'Crime', 'Police', 'Missing'], 'subject': [{'qcode': '17004000', 'name': 'Statistics'}, {'qcode': '04001002', 'name': 'Weather'}], 'state': 'submitted', 'expiry': utcnow() + timedelta(minutes=20), 'unique_name': '#8'}, {'guid': 'tag:localhost:2015:69b961ab-2816-4b8a-a584-a7b402fed4f9', versioned_id_field(): '8', 'type': 'text', config.VERSION: 3, 'urgency': 4, 'headline': 'Two students missing', 'pubstatus': 'usable', 'firstcreated': utcnow(), 'byline': 'By Alan Karben', 'ednote': 'Andrew Marwood contributed to this article', 'dateline': 'Sydney', 'keywords': ['Student', 'Crime', 'Police', 'Missing'], 'subject': [{'qcode': '17004000', 'name': 'Statistics'}, {'qcode': '04001002', 'name': 'Weather'}], 'state': 'in_progress', 'expiry': utcnow() + timedelta(minutes=20), 'unique_name': '#8'}, {'guid': 'tag:localhost:2015:69b961ab-2816-4b8a-a584-a7b402fed4f9', versioned_id_field(): '8', 'type': 'text', config.VERSION: 4, 'body_html': 'Test body', 'urgency': 4, 'headline': 'Two students missing', 'pubstatus': 'usable', 'firstcreated': utcnow(), 'byline': 'By Alan Karben', 'ednote': 'Andrew Marwood contributed to this article', 'dateline': 'Sydney', 'keywords': ['Student', 'Crime', 'Police', 'Missing'], 'subject': [{'qcode': '17004000', 'name': 'Statistics'}, {'qcode': '04001002', 'name': 'Weather'}], 'state': 'in_progress', 'expiry': utcnow() + timedelta(minutes=20), 'unique_name': '#8'}]
def upsert_into_legal_archive(self, item_id): """Once publish actions are performed on the article do the below: 1. Get legal archive article. 2. De-normalize the expired article 3. Upserting Legal Archive. 4. Get Version History and De-normalize and Inserting Legal Archive Versions 5. Get History and de-normalize and insert into Legal Archive History :param dict item_id: id of the document from 'archive' collection. """ try: logger.info("Import item into legal {}.".format(item_id)) doc = get_resource_service(ARCHIVE).find_one(req=None, _id=item_id) if not doc: logger.error( "Could not find the document {} to import to legal archive." .format(item_id)) return # setting default values in case they are missing other log message will fail. doc.setdefault("unique_name", "NO UNIQUE NAME") doc.setdefault(config.VERSION, 1) doc.setdefault("expiry", utcnow()) if not doc.get(ITEM_STATE) in PUBLISH_STATES: # at times we have seen that item is published but the item is different in the archive collection # this will notify admins about the issue but proceed to move the item into legal archive. msg = "Invalid state: {}. Moving the item to legal archive. item: {}".format( doc.get(ITEM_STATE), self.log_msg_format.format(**doc)) logger.error(msg) update_notifiers(ACTIVITY_ERROR, msg=msg, resource=ARCHIVE) # required for behave test. legal_archive_doc = deepcopy(doc) legal_archive_service = get_resource_service(LEGAL_ARCHIVE_NAME) legal_archive_versions_service = get_resource_service( LEGAL_ARCHIVE_VERSIONS_NAME) legal_archive_history_service = get_resource_service( LEGAL_ARCHIVE_HISTORY_NAME) log_msg = self.log_msg_format.format(**legal_archive_doc) version_id_field = versioned_id_field( app.config["DOMAIN"][ARCHIVE]) logger.info( "Preparing Article to be inserted into Legal Archive " + log_msg) # Removing irrelevant properties legal_archive_doc.pop(config.ETAG, None) legal_archive_doc.pop("lock_user", None) legal_archive_doc.pop("lock_session", None) legal_archive_doc.pop("lock_time", None) legal_archive_doc.pop("lock_action", None) logger.info( "Removed irrelevant properties from the article {}".format( log_msg)) # Step 1 article_in_legal_archive = legal_archive_service.find_one( req=None, _id=legal_archive_doc[config.ID_FIELD]) if article_in_legal_archive and article_in_legal_archive.get( config.VERSION, 0) > legal_archive_doc.get(config.VERSION): logger.info( "Item {} version: {} already in legal archive. Legal Archive document version {}" .format( legal_archive_doc.get(config.ID_FIELD), legal_archive_doc.get(config.VERSION), article_in_legal_archive.get(config.VERSION), )) self._set_moved_to_legal(doc) return # Step 2 - De-normalizing the legal archive doc self._denormalize_user_desk(legal_archive_doc, log_msg) logger.info("De-normalized article {}".format(log_msg)) # Step 3 - Upserting Legal Archive logger.info( "Upserting Legal Archive Repo with article {}".format(log_msg)) if article_in_legal_archive: legal_archive_service.put(legal_archive_doc[config.ID_FIELD], legal_archive_doc) else: legal_archive_service.post([legal_archive_doc]) # Step 4 - Get Versions and De-normalize and Inserting Legal Archive Versions lookup = {version_id_field: legal_archive_doc[config.ID_FIELD]} versions = list( get_resource_service("archive_versions").get(req=None, lookup=lookup)) legal_versions = list( legal_archive_versions_service.get(req=None, lookup=lookup)) logger.info( "Fetched version history for article {}".format(log_msg)) versions_to_insert = [ version for version in versions if not any(legal_version for legal_version in legal_versions if version[config.VERSION] == legal_version[ config.VERSION]) ] # Step 5 - Get History and de-normalize and insert into Legal Archive History lookup = {"item_id": legal_archive_doc[config.ID_FIELD]} history_items = list( get_resource_service("archive_history").get(req=None, lookup=lookup)) legal_history_items = list( legal_archive_history_service.get(req=None, lookup=lookup)) logger.info("Fetched history for article {}".format(log_msg)) history_to_insert = [ history for history in history_items if not any(legal_version for legal_version in legal_history_items if history[config.ID_FIELD] == legal_version[ config.ID_FIELD]) ] # This happens when user kills an article from Dusty Archive if (article_in_legal_archive and article_in_legal_archive[config.VERSION] < legal_archive_doc[config.VERSION] and len(versions_to_insert) == 0): resource_def = app.config["DOMAIN"][ARCHIVE] versioned_doc = deepcopy(legal_archive_doc) versioned_doc[versioned_id_field( resource_def)] = legal_archive_doc[config.ID_FIELD] versioned_doc[config.ID_FIELD] = ObjectId() versions_to_insert.append(versioned_doc) for version_doc in versions_to_insert: self._denormalize_user_desk( version_doc, self.log_msg_format.format( _id=version_doc[version_id_field], unique_name=version_doc.get("unique_name"), _current_version=version_doc[config.VERSION], expiry=version_doc.get("expiry"), ), ) version_doc.pop(config.ETAG, None) if versions_to_insert: legal_archive_versions_service.post(versions_to_insert) logger.info( "Inserted de-normalized versions for article {}".format( log_msg)) for history_doc in history_to_insert: self._denormalize_history(history_doc) history_doc.pop(config.ETAG, None) if history_to_insert: legal_archive_history_service.post(history_to_insert) logger.info( "Inserted de-normalized history for article {}".format( log_msg)) # Set the flag that item is moved to legal. self._set_moved_to_legal(doc) logger.info("Upsert completed for article " + log_msg) except Exception: logger.exception( "Failed to import into legal archive {}.".format(item_id)) raise
def upsert_into_legal_archive(self, doc): """ Once publish actions are performed on the article do the below: 1. Get legal archive article. 2. De-normalize the expired article 3. Upserting Legal Archive. 4. Get Version History and De-normalize and Inserting Legal Archive Versions :param dict doc: doc from 'archive' collection. """ if not doc.get(ITEM_STATE) in { CONTENT_STATE.PUBLISHED, CONTENT_STATE.CORRECTED, CONTENT_STATE.KILLED }: logger.error( 'Invalid state: {}. Cannot move the item to legal archive. item: {}' .format(doc.get(ITEM_STATE), self.log_msg_format.format(**doc))) return # required for behave test. legal_archive_doc = deepcopy(doc) legal_archive_service = get_resource_service(LEGAL_ARCHIVE_NAME) legal_archive_versions_service = get_resource_service( LEGAL_ARCHIVE_VERSIONS_NAME) log_msg = self.log_msg_format.format(**legal_archive_doc) version_id_field = versioned_id_field(app.config['DOMAIN'][ARCHIVE]) logger.info('Preparing Article to be inserted into Legal Archive ' + log_msg) # Removing Irrelevant properties legal_archive_doc.pop(config.ETAG, None) legal_archive_doc.pop('lock_user', None) legal_archive_doc.pop('lock_session', None) legal_archive_doc.pop('lock_time', None) logger.info('Removed irrelevant properties from the article {}'.format( log_msg)) # Step 1 article_in_legal_archive = legal_archive_service.find_one( req=None, _id=legal_archive_doc[config.ID_FIELD]) # Step 2 - De-normalizing the legal archive doc self._denormalize_user_desk(legal_archive_doc, log_msg) logger.info('De-normalized article {}'.format(log_msg)) # Step 3 - Upserting Legal Archive logger.info( 'Upserting Legal Archive Repo with article {}'.format(log_msg)) if article_in_legal_archive: legal_archive_service.put(legal_archive_doc[config.ID_FIELD], legal_archive_doc) else: legal_archive_service.post([legal_archive_doc]) # Step 4 - Get Version History and De-normalize and Inserting Legal Archive Versions lookup = {version_id_field: legal_archive_doc[config.ID_FIELD]} version_history = list( get_resource_service('archive_versions').get(req=None, lookup=lookup)) legal_version_history = list( legal_archive_versions_service.get(req=None, lookup=lookup)) logger.info('Fetched version history for article {}'.format(log_msg)) versions_to_insert = [ version for version in version_history if not any( legal_version for legal_version in legal_version_history if version[config.VERSION] == legal_version[config.VERSION]) ] # This happens when user kills an article from Dusty Archive if article_in_legal_archive and article_in_legal_archive[config.VERSION] < legal_archive_doc[config.VERSION] \ and len(versions_to_insert) == 0: resource_def = app.config['DOMAIN'][ARCHIVE] versioned_doc = deepcopy(legal_archive_doc) versioned_doc[versioned_id_field( resource_def)] = legal_archive_doc[config.ID_FIELD] versioned_doc[config.ID_FIELD] = ObjectId() versions_to_insert.append(versioned_doc) for version_doc in versions_to_insert: self._denormalize_user_desk( version_doc, self.log_msg_format.format( _id=version_doc[version_id_field], unique_name=version_doc.get('unique_name'), _current_version=version_doc[config.VERSION], expiry=version_doc.get('expiry'))) version_doc.pop(config.ETAG, None) if versions_to_insert: legal_archive_versions_service.post(versions_to_insert) logger.info( 'Inserted de-normalized version history for article {}'.format( log_msg)) logger.info('Upsert completed for article ' + log_msg)
def test_remove_expired_published_and_killed_content(self): cmd = ValidatorsPopulateCommand() with self.app.app_context(): cmd.run(self.filename) self.app.data.insert('archive_versions', self.article_versions) published_service = get_resource_service('published') text_archive = get_resource_service('text_archive') # Publishing an Article doc = self.articles[0] original = doc.copy() get_resource_service('archive_publish').queue_transmission(original) published_service.post([original]) published_items = published_service.get_other_published_items(original['item_id']) self.assertEquals(1, published_items.count()) # Setting the expiry date of the published article to 1 hr back from now published_service.update_published_items(original['item_id'], 'expiry', utcnow() + timedelta(minutes=-60)) # Killing the published article and manually inserting the version of the article as unittests use # service directly _current_version = doc[config.VERSION] + 1 get_resource_service('archive_kill').patch(id=doc['_id'], updates={config.VERSION: _current_version}) killed_version = { 'guid': 'tag:localhost:2015:69b961ab-2816-4b8a-a584-a7b402fed4f9', versioned_id_field(): '1', 'type': 'text', config.VERSION: _current_version, 'body_html': 'Test body', 'destination_groups': ['4'], 'urgency': 4, 'headline': 'Two students missing', 'pubstatus': 'usable', 'firstcreated': utcnow(), 'byline': 'By Alan Karben', 'ednote': 'Andrew Marwood contributed to this article', 'dateline': 'Sydney', 'keywords': ['Student', 'Crime', 'Police', 'Missing'], 'subject': [{'qcode': '17004000', 'name': 'Statistics'}, {'qcode': '04001002', 'name': 'Weather'}], 'state': 'published', 'expiry': utcnow() + timedelta(minutes=20), 'unique_name': '#2' } self.app.data.insert('archive_versions', [killed_version]) # Executing the Expiry Job for the Published Article and asserting the collections RemoveExpiredPublishContent().run() articles_in_text_archive = text_archive.get(req=None, lookup={'item_id': original['item_id']}) self.assertEquals(articles_in_text_archive.count(), 0) published_items = published_service.get_other_published_items(str(original['item_id'])) self.assertEquals(1, published_items.count()) article_in_production = get_resource_service(ARCHIVE).find_one(req=None, _id=original['item_id']) self.assertIsNotNone(article_in_production) self.assertEquals(article_in_production['state'], 'killed') self.assertEquals(article_in_production[config.VERSION], _current_version) # Validate the collections in Legal Archive article_in_legal_archive, article_versions_in_legal_archive, formatted_items, queue_items = \ self.__get_legal_archive_details(original['item_id']) self.assertIsNotNone(article_in_legal_archive, 'Article cannot be none in Legal Archive') self.assertEquals(article_in_legal_archive['state'], 'published') self.assertIsNotNone(article_versions_in_legal_archive, 'Article Versions cannot be none in Legal Archive') self.assertEquals(article_versions_in_legal_archive.count(), 4) self.assertGreaterEqual(formatted_items.count(), 1, 'Formatted Items must be greater than or equal to 1') for formatted_item in formatted_items: self.assertEquals(formatted_item['item_id'], original['item_id']) self.assertEquals(formatted_item['item_version'], self.articles[0][config.VERSION]) self.assertGreaterEqual(queue_items.count(), 1, 'Publish Queue Items must be greater than or equal to 1') # Setting the expiry date of the killed article to 1 hr back from now and running the job again published_service.update_published_items(original['item_id'], 'expiry', utcnow() + timedelta(minutes=-60)) RemoveExpiredPublishContent().run() articles_in_text_archive = text_archive.get(req=None, lookup={'item_id': original['item_id']}) self.assertEquals(articles_in_text_archive.count(), 0) published_items = published_service.get_other_published_items(str(original['item_id'])) self.assertEquals(0, published_items.count()) article_in_production = get_resource_service(ARCHIVE).find_one(req=None, _id=original['item_id']) self.assertIsNone(article_in_production) # Validate the collections in Legal Archive article_in_legal_archive, article_versions_in_legal_archive, formatted_items, queue_items = \ self.__get_legal_archive_details(original['item_id'], article_version=_current_version, publishing_action='killed') self.assertIsNotNone(article_in_legal_archive, 'Article cannot be none in Legal Archive') self.assertEquals(article_in_legal_archive['state'], 'killed') self.assertIsNotNone(article_versions_in_legal_archive, 'Article Versions cannot be none in Legal Archive') self.assertEquals(article_versions_in_legal_archive.count(), 5) self.assertGreaterEqual(formatted_items.count(), 1, 'Formatted Items must be greater than or equal to 1') for formatted_item in formatted_items: self.assertEquals(formatted_item['item_id'], original['item_id']) self.assertEquals(formatted_item['item_version'], _current_version) self.assertGreaterEqual(queue_items.count(), 1, 'Publish Queue Items must be greater than or equal to 1')
def getitem_internal(resource, **lookup): """ :param resource: the name of the resource to which the document belongs. :param **lookup: the lookup query. .. versionchanged:: 0.6 Handle soft deleted documents .. versionchanged:: 0.5 Allow ``?version=all`` requests to fire ``on_fetched_*`` events. Create pagination links for document versions. (#475) Pagination links reflect current query. (#464) .. versionchanged:: 0.4 HATOEAS link for contains the business unit value even when regexes have been configured for the resource endpoint. 'on_fetched' now returns the whole response (HATEOAS metafields included.) Support for document versioning. Changed ``on_fetch_*`` changed to ``on_fetched_*``. .. versionchanged:: 0.3 Support for media fields. When IF_MATCH is disabled, no etag is included in the payload. .. versionchanged:: 0.1.1 Support for Embeded Resource Serialization. .. versionchanged:: 0.1.0 Support for optional HATEOAS. .. versionchanged: 0.0.8 'on_getting_item' event is raised when a document has been read from the database and is about to be sent to the client. .. versionchanged:: 0.0.7 Support for Rate-Limiting. .. versionchanged:: 0.0.6 Support for HEAD requests. .. versionchanged:: 0.0.6 ETag added to payload. .. versionchanged:: 0.0.5 Support for user-restricted access to resources. Support for LAST_UPDATED field missing from documents, because they were created outside the API context. .. versionchanged:: 0.0.4 Added the ``requires_auth`` decorator. .. versionchanged:: 0.0.3 Superflous ``response`` container removed. Links wrapped with ``_links``. Links are now properly JSON formatted. """ req = parse_request(resource) resource_def = config.DOMAIN[resource] embedded_fields = resolve_embedded_fields(resource, req) soft_delete_enabled = config.DOMAIN[resource]['soft_delete'] if soft_delete_enabled: # GET requests should always fetch soft deleted documents from the db # They are handled and included in 404 responses below. req.show_deleted = True document = app.data.find_one(resource, req, **lookup) if not document: abort(404) response = {} etag = None version = request.args.get(config.VERSION_PARAM) latest_doc = None cursor = None # calculate last_modified before get_old_document rolls back the document, # allowing us to invalidate the cache when _latest_version changes last_modified = last_updated(document) # synthesize old document version(s) if resource_def['versioning'] is True: latest_doc = document document = get_old_document( resource, req, lookup, document, version) # meld into response document build_response_document(document, resource, embedded_fields, latest_doc) if config.IF_MATCH: etag = document[config.ETAG] if resource_def['versioning'] is True: # In order to keep the LATEST_VERSION field up to date in client # caches, changes to the latest version should invalidate cached # copies of previous verisons. Incorporate the latest version into # versioned document ETags on the fly to ensure 'If-None-Match' # comparisons support this caching behavior. etag += str(document[config.LATEST_VERSION]) # check embedded fields resolved in build_response_document() for more # recent last updated timestamps. We don't want to respond 304 if embedded # fields have changed for field in embedded_fields: embedded_document = document.get(field) if isinstance(embedded_document, dict): embedded_last_updated = last_updated(embedded_document) if embedded_last_updated > last_modified: last_modified = embedded_last_updated # facilitate client caching by returning a 304 when appropriate cache_validators = {True: 0, False: 0} if req.if_modified_since: cache_valid = (last_modified <= req.if_modified_since) cache_validators[cache_valid] += 1 if req.if_none_match: cache_valid = (etag == req.if_none_match) cache_validators[cache_valid] += 1 # If all cache validators are true, return 304 if (cache_validators[True] > 0) and (cache_validators[False] == 0): return {}, last_modified, etag, 304 if version == 'all' or version == 'diffs': # find all versions lookup[versioned_id_field(resource_def)] \ = lookup[resource_def['id_field']] del lookup[resource_def['id_field']] if version == 'diffs' or req.sort is None: # default sort for 'all', required sort for 'diffs' req.sort = '[("%s", 1)]' % config.VERSION req.if_modified_since = None # we always want the full history here cursor = app.data.find(resource + config.VERSIONS, req, lookup) # build all versions documents = [] if cursor.count() == 0: # this is the scenario when the document existed before # document versioning got turned on documents.append(latest_doc) else: last_document = {} # if we aren't starting on page 1, then we need to init last_doc if version == 'diffs' and req.page > 1: # grab the last document on the previous page to diff from last_version = cursor[0][app.config['VERSION']] - 1 last_document = get_old_document( resource, req, lookup, latest_doc, last_version) for i, document in enumerate(cursor): document = synthesize_versioned_document( latest_doc, document, resource_def) build_response_document( document, resource, embedded_fields, latest_doc) if version == 'diffs': if i == 0: documents.append(document) else: documents.append(diff_document( resource_def, last_document, document)) last_document = document else: documents.append(document) # add documents to response if config.DOMAIN[resource]['hateoas']: response[config.ITEMS] = documents else: response = documents elif soft_delete_enabled and document.get(config.DELETED) is True: # This document was soft deleted. Respond with 404 and the deleted # version of the document. document[config.STATUS] = config.STATUS_ERR, document[config.ERROR] = { 'code': 404, 'message': 'The requested URL was not found on this server.' } return document, last_modified, etag, 404 else: response = document # extra hateoas links if config.DOMAIN[resource]['hateoas']: # use the id of the latest document for multi-document requests if cursor: count = cursor.count(with_limit_and_skip=False) response[config.LINKS] = \ _pagination_links(resource, req, count, latest_doc[resource_def['id_field']]) if config.DOMAIN[resource]['pagination']: response[config.META] = _meta_links(req, count) else: response[config.LINKS] = \ _pagination_links(resource, req, None, response[resource_def['id_field']]) # callbacks not supported on version diffs because of partial documents if version != 'diffs': # TODO: callbacks not currently supported with ?version=all # notify registered callback functions. Please note that, should # the functions modify the document, last_modified and etag # won't be updated to reflect the changes (they always reflect the # documents state on the database). if resource_def['versioning'] is True and version == 'all': versions = response if config.DOMAIN[resource]['hateoas']: versions = response[config.ITEMS] for version_item in versions: getattr(app, "on_fetched_item")(resource, version_item) getattr(app, "on_fetched_item_%s" % resource)(version_item) else: getattr(app, "on_fetched_item")(resource, response) getattr(app, "on_fetched_item_%s" % resource)(response) return response, last_modified, etag, 200
def _upsert_into_legal_archive(self, doc): """ For the expired published article represented by doc, do the below: 1. Fetch version history of article so that version_history_doc[config.VERSION] <= doc[config.VERSION]. 2. De-normalize the expired article and each version of the article 3. Fetch Transmission Details so that queued_item['item_version'] == doc[config.VERSION] 4. De-normalize the Transmission Details 5. An article can be published more than time before it's removed from production database, it's important to check if the article already exists in Legal Archive DB. If exists then replace the article in Legal Archive DB, otherwise create. 6. Create the Version History of the article in Legal Archive DB. 7. Create the Transmission Details in Legal Archive DB. :param: doc - expired doc from 'published' collection. :return: transmission details """ legal_archive_doc = doc.copy() logging.info('Preparing Article to be inserted into Legal Archive %s' % legal_archive_doc.get('unique_name')) # Removing Irrelevant properties legal_archive_doc[config.ID_FIELD] = legal_archive_doc['item_id'] del legal_archive_doc[config.ETAG] del legal_archive_doc['item_id'] logging.info('Removed Irrelevant properties from the article %s' % legal_archive_doc.get('unique_name')) # Step 3 - Fetch Publish Queue Items lookup = {'item_id': legal_archive_doc[config.ID_FIELD], 'item_version': legal_archive_doc[config.VERSION]} queue_items = list(get_resource_service('publish_queue').get(req=None, lookup=lookup)) assert len(queue_items) > 0, \ "Transmission Details are empty for published item %s" % legal_archive_doc[config.ID_FIELD] logging.info('Fetched transmission details for article %s' % legal_archive_doc.get('unique_name')) # Step 4 subscriber_ids = list({str(queue_item['subscriber_id']) for queue_item in queue_items}) query = {'$and': [{config.ID_FIELD: {'$in': subscriber_ids}}]} subscribers = list(get_resource_service('subscribers').get(req=None, lookup=query)) subscribers = {str(subscriber[config.ID_FIELD]): subscriber for subscriber in subscribers} for queue_item in queue_items: del queue_item[config.ETAG] queue_item['subscriber_id'] = subscribers[str(queue_item['subscriber_id'])]['name'] logging.info( 'De-normalized the Transmission Detail records of article %s' % legal_archive_doc.get('unique_name')) # Step 2 - De-normalizing the legal archive doc self._denormalize_user_desk(legal_archive_doc) # Step 1 - Get Version History req = ParsedRequest() req.sort = '[("%s", 1)]' % config.VERSION resource_def = app.config['DOMAIN']['archive'] version_id = versioned_id_field(resource_def) lookup = {'$and': [{version_id: legal_archive_doc[config.ID_FIELD]}, {config.VERSION: {'$lte': legal_archive_doc[config.VERSION]}}]} version_history = list(get_resource_service('archive_versions').get(req=req, lookup=lookup)) legal_archive_doc_versions = [] for versioned_doc in version_history: self._denormalize_user_desk(versioned_doc) del versioned_doc[config.ETAG] legal_archive_doc_versions.append(versioned_doc) logging.info('Fetched version history for article %s' % legal_archive_doc.get('unique_name')) legal_archive_service = get_resource_service(LEGAL_ARCHIVE_NAME) legal_archive_versions_service = get_resource_service(LEGAL_ARCHIVE_VERSIONS_NAME) legal_publish_queue_service = get_resource_service(LEGAL_PUBLISH_QUEUE_NAME) # Step 5 - Upserting Legal Archive logging.info('Upserting Legal Archive Repo with article %s' % legal_archive_doc.get('unique_name')) article_in_legal_archive = legal_archive_service.find_one(_id=legal_archive_doc[config.ID_FIELD], req=ParsedRequest()) if article_in_legal_archive: legal_archive_service.put(legal_archive_doc[config.ID_FIELD], legal_archive_doc) else: legal_archive_service.post([legal_archive_doc]) # Step 6 if legal_archive_doc_versions: legal_archive_versions_service.post(legal_archive_doc_versions) # Step 7 legal_publish_queue_service.post(queue_items) logging.info('Upsert completed for article %s' % legal_archive_doc.get('unique_name')) return queue_items