예제 #1
0
    def remove_expired(self, doc):
        """
        Removes the expired published article from 'published' collection. Below is the workflow:
            1.  If doc is a package then recursively move the items in the package to legal archive if the item wasn't
                moved before. And then run the package through the expiry workflow.
            2.  Check if doc has expired. This is needed because when doc is a package and expired but the items in the
                package are not expired. If expired then update allow_post_publish_actions, can_be_removed flags.
            3.  Insert/update the doc in Legal Archive repository
                (a) All references to master data like users, desks ... are de-normalized before inserting into
                    Legal Archive. Same is done to each version of the article.
                (b) Inserts Transmission Details (fetched from publish_queue collection)
            4.  If the doc has expired then remove the transmission details from Publish Queue collection.
            5.  If the doc has expired  and is eligible to be removed from production then remove the article and
                its versions from archive and archive_versions collections respectively.
            6.  Removes the item from published collection, if can_be_removed is True

        :param doc: doc in 'published' collection
        """

        log_msg_format = "{{'_id': {item_id}, 'unique_name': {unique_name}, 'version': {_current_version}, " \
                         "'expired_on': {expiry}}}."
        log_msg = log_msg_format.format(**doc)

        version_id_field = versioned_id_field(app.config['DOMAIN'][ARCHIVE])
        can_be_removed = doc['can_be_removed']

        if not can_be_removed:
            if doc[ITEM_TYPE] == CONTENT_TYPE.COMPOSITE:  # Step 1
                logging.info('Starting the workflow for removal of the expired package ' + log_msg)
                self._handle_expired_package(doc)

            logging.info('Starting the workflow for removal of the expired item ' + log_msg)
            is_expired = doc['expiry'] <= utcnow()

            if is_expired:  # Step 2
                updates = self._update_flags(doc, log_msg)
                doc.update(updates)
                can_be_removed = updates.get('can_be_removed', can_be_removed)

            # Step 3
            publish_queue_items = self._upsert_into_legal_archive(doc, version_id_field, log_msg_format, log_msg)
            if is_expired:  # Step 4
                logging.info('Removing the transmission details for expired item ' + log_msg)
                for publish_queue_item in publish_queue_items:
                    get_resource_service('publish_queue').delete_action(
                        lookup={config.ID_FIELD: publish_queue_item[config.ID_FIELD]})

            if is_expired and self.can_remove_from_production(doc):  # Step 5
                logging.info('Removing the expired item from production ' + log_msg)
                lookup = {'$and': [{version_id_field: doc['item_id']},
                                   {config.VERSION: {'$lte': doc[config.VERSION]}}]}
                get_resource_service('archive_versions').delete(lookup)

                get_resource_service(ARCHIVE).delete_action({config.ID_FIELD: doc['item_id']})

        if can_be_removed:  # Step 6
            logging.info('Removing the expired item from published collection ' + log_msg)
            self.delete_by_article_id(_id=doc['item_id'], doc=doc)

        logging.info('Completed the workflow for removing the expired publish item ' + log_msg)
예제 #2
0
    def _duplicate_versions(self, old_id, new_doc):
        """
        Duplicates the version history of the article identified by old_id. Each version identifiers are changed
        to have the identifiers of new_doc.

        :param old_id: identifier to fetch version history
        :param new_doc: identifiers from this doc will be used to create version history for the duplicated item.
        """
        resource_def = app.config['DOMAIN']['archive']
        version_id = versioned_id_field(resource_def)
        old_versions = get_resource_service('archive_versions').get(req=None, lookup={'guid': old_id})

        new_versions = []
        for old_version in old_versions:
            old_version[version_id] = new_doc[config.ID_FIELD]
            del old_version[config.ID_FIELD]

            old_version['guid'] = new_doc['guid']
            old_version['unique_name'] = new_doc['unique_name']
            old_version['unique_id'] = new_doc['unique_id']
            old_version['versioncreated'] = utcnow()
            if old_version[VERSION] == new_doc[VERSION]:
                old_version[ITEM_OPERATION] = new_doc[ITEM_OPERATION]
            new_versions.append(old_version)
        last_version = deepcopy(new_doc)
        last_version['_id_document'] = new_doc['_id']
        del last_version['_id']
        new_versions.append(last_version)
        if new_versions:
            get_resource_service('archive_versions').post(new_versions)
예제 #3
0
    def _duplicate_versions(self, old_id, new_doc):
        """
        Duplicates the version history of the article identified by old_id. Each version identifiers are changed
        to have the identifiers of new_doc.

        :param old_id: identifier to fetch version history
        :param new_doc: identifiers from this doc will be used to create version history for the duplicated item.
        """

        old_versions = get_resource_service("archive_versions").get(req=None, lookup={"guid": old_id})

        new_versions = []
        for old_version in old_versions:
            old_version[versioned_id_field()] = new_doc[config.ID_FIELD]
            del old_version[config.ID_FIELD]

            old_version["guid"] = new_doc["guid"]
            old_version["unique_name"] = new_doc["unique_name"]
            old_version["unique_id"] = new_doc["unique_id"]
            old_version["versioncreated"] = utcnow()
            if old_version[VERSION] == new_doc[VERSION]:
                old_version[ITEM_OPERATION] = new_doc[ITEM_OPERATION]
            new_versions.append(old_version)
        last_version = deepcopy(new_doc)
        last_version["_id_document"] = new_doc["_id"]
        del last_version["_id"]
        new_versions.append(last_version)
        if new_versions:
            get_resource_service("archive_versions").post(new_versions)
예제 #4
0
 def delete_by_article_ids(self, ids):
     """
     remove the content
     :param list ids: list of ids to be removed
     """
     version_field = versioned_id_field(app.config['DOMAIN']['archive_versions'])
     get_resource_service('archive_versions').delete(lookup={version_field: {'$in': ids}})
     super().delete_action({config.ID_FIELD: {'$in': ids}})
예제 #5
0
    def remove_expired(self, doc):
        """
        Removes the article from production if the state is spiked
        """

        assert doc[ITEM_STATE] == CONTENT_STATE.SPIKED, \
            "Article state is %s. Only Spiked Articles can be removed" % doc[ITEM_STATE]

        doc_id = str(doc[config.ID_FIELD])
        super().delete_action({config.ID_FIELD: doc_id})
        get_resource_service('archive_versions').delete(lookup={versioned_id_field(): doc_id})
예제 #6
0
    def get(self, req, lookup):
        resource_def = app.config['DOMAIN']['items']
        id_field = versioned_id_field(resource_def)

        lookup = {'$and': [lookup, {'pubstatus': {'$ne': 'canceled'}}]}
        version_history = list(super().get_from_mongo(req=req, lookup=lookup))

        for doc in version_history:
            doc[config.ID_FIELD] = doc[id_field]

        return ListCursor(version_history)
예제 #7
0
    def _get_legal_archive_details(self, article_id, publishing_action=None):
        archive_service = get_resource_service(LEGAL_ARCHIVE_NAME)
        archive_versions_service = get_resource_service(LEGAL_ARCHIVE_VERSIONS_NAME)
        publish_queue_service = get_resource_service(LEGAL_PUBLISH_QUEUE_NAME)

        article = archive_service.find_one(_id=article_id, req=None)
        article_versions = archive_versions_service.get(req=None, lookup={versioned_id_field(): article_id})

        lookup = {'item_id': article_id, 'publishing_action': publishing_action} if publishing_action else \
            {'item_id': article_id}
        queue_items = publish_queue_service.get(req=None, lookup=lookup)

        return article, article_versions, queue_items
예제 #8
0
    def remove_expired(self, doc):
        """
        Removes the article from production if the state is spiked
        """

        assert doc[ITEM_STATE] == CONTENT_STATE.SPIKED, \
            "Article state is %s. Only Spiked Articles can be removed" % doc[ITEM_STATE]

        doc_id = str(doc[config.ID_FIELD])
        resource_def = app.config['DOMAIN']['archive_versions']
        get_resource_service('archive_versions').delete(
            lookup={versioned_id_field(resource_def): doc_id})
        super().delete_action({config.ID_FIELD: doc_id})
예제 #9
0
    def get(self, req, lookup):
        resource_def = app.config['DOMAIN']['items']
        id_field = versioned_id_field(resource_def)

        if req and req.args and req.args.get(config.ID_FIELD):
            version_history = list(super().get_from_mongo(
                req=ParsedRequest(),
                lookup={id_field: req.args.get(config.ID_FIELD)}))
        else:
            version_history = list(super().get_from_mongo(req=req,
                                                          lookup=lookup))

        for doc in version_history:
            doc[config.ID_FIELD] = doc[id_field]

        return ListCursor(version_history)
예제 #10
0
    def remove_expired(self, doc):
        """
        Removes the expired published article from 'published' collection. Below is the workflow:
            1.  Update allow_post_publish_actions, can_be_removed flags.
            2.  Inserts/updates the article in Legal Archive repository
                (a) All references to master data like users, desks ... is de-normalized and then
                    inserted into Legal Archive. Same is done to each version of the article.
                (b) Inserts Formatted Items
                (c) Inserts Transmission Details (fetched from publish_queue collection)
            3.  Removes the item from publish_queue and published collections, if can_be_removed is True
            4.  Remove the article and its versions from archive collection if all of the below conditions are met:
                (a) Article hasn't been published/corrected/killed again
                (b) Article isn't part of a package

        :param doc: doc in 'published' collection
        """

        logging.info("Starting the workflow for removing the expired publish item with id: %s" % doc['item_id'])

        can_be_removed = doc['can_be_removed']

        if not can_be_removed:
            # Step 1
            updates = self._update_flags(doc)
            doc.update(updates)
            can_be_removed = updates.get('can_be_removed', can_be_removed)

            # Step 2
            publish_queue_items = self._upsert_into_legal_archive(doc)
            for publish_queue_item in publish_queue_items:
                get_resource_service('publish_queue').delete_action(
                    lookup={config.ID_FIELD: publish_queue_item[config.ID_FIELD]})

            # Step 4
            if self.can_remove_from_production(doc):
                resource_def = app.config['DOMAIN']['archive']
                lookup = {'$and': [{versioned_id_field(resource_def): doc['item_id']},
                                   {config.VERSION: {'$lte': doc[config.VERSION]}}]}
                get_resource_service('archive_versions').delete(lookup)

                get_resource_service(ARCHIVE).delete_action({config.ID_FIELD: doc['item_id']})

        if can_be_removed:
            # Step 3
            self.delete_by_article_id(_id=doc['item_id'], doc=doc)

        logging.info("Completed the workflow for removing the expired publish item with id: %s" % doc['item_id'])
예제 #11
0
    def _get_legal_archive_details(self, article_id, publishing_action=None):
        archive_service = get_resource_service(LEGAL_ARCHIVE_NAME)
        archive_versions_service = get_resource_service(LEGAL_ARCHIVE_VERSIONS_NAME)
        publish_queue_service = get_resource_service(LEGAL_PUBLISH_QUEUE_NAME)

        article = archive_service.find_one(_id=article_id, req=None)
        resource_def = self.app.config['DOMAIN'][LEGAL_ARCHIVE_VERSIONS_NAME]
        version_id = versioned_id_field(resource_def)
        article_versions = archive_versions_service.get(req=None, lookup={version_id: article_id})

        lookup = {'item_id': article_id}
        if publishing_action:
            lookup['publishing_action'] = publishing_action

        queue_items = publish_queue_service.get(req=None, lookup=lookup)

        return article, article_versions, queue_items
예제 #12
0
    def remove_expired(self, doc):
        """
        Removes the expired published article from 'published' collection. Below is the workflow:
            1.  If type of the article is either text or pre-formatted then a copy is inserted into Text Archive
            2.  Inserts/updates the article in Legal Archive repository
                (a) All references to master data like users, desks, destination groups... is de-normalized and then
                    inserted into Legal Archive. Same is done to each version of the article.
                (b) Inserts Formatted Items
                (c) Inserts Transmission Details (fetched from publish_queue collection)
            3.  Removes the item from formatted_item, publish_queue and published collections
            4.  Remove the article and its versions from archive collection if all of the below conditions are met:
                (a) Article hasn't been published/corrected/killed again
                (b) Article isn't part of a package

        :param doc: doc in 'published' collection
        """

        logging.info("Starting the workflow for removing the expired publish item with id: %s" % doc['item_id'])

        # Step 1
        if 'type' in doc and doc['type'] in ['text', 'preformatted']:
            self._insert_into_or_remove_from_text_archive(doc)

        # Step 2
        formatted_item_ids, publish_queue_items = self._upsert_into_legal_archive(doc)
        for formatted_item_id in formatted_item_ids:
            get_resource_service('formatted_item').delete_action(lookup={config.ID_FIELD: formatted_item_id})

        for publish_queue_item in publish_queue_items:
            get_resource_service('publish_queue').delete_action(
                lookup={config.ID_FIELD: publish_queue_item[config.ID_FIELD]})

        # Step 3
        self.delete_by_article_id(_id=doc['item_id'], doc=doc)

        # Step 4
        items = self.get_other_published_items(doc['item_id'])
        if items.count() == 0 and self.__is_orphan(doc):
            lookup = {'$and': [{versioned_id_field(): doc['item_id']}, {config.VERSION: {'$lte': doc[config.VERSION]}}]}
            get_resource_service('archive_versions').delete(lookup)

            get_resource_service(ARCHIVE).delete_action({config.ID_FIELD: doc['item_id']})

        logging.info("Completed the workflow for removing the expired publish item with id: %s" % doc['item_id'])
예제 #13
0
    def get(self, req, lookup):
        """
        Version of an article in Legal Archive isn't maintained by Eve. Overriding this to fetch the version history.
        """

        resource_def = app.config['DOMAIN'][LEGAL_ARCHIVE_NAME]
        id_field = versioned_id_field(resource_def)

        if req and req.args and req.args.get(config.ID_FIELD):
            version_history = list(super().get_from_mongo(req=ParsedRequest(),
                                                          lookup={id_field: req.args.get(config.ID_FIELD)}))
        else:
            version_history = list(super().get_from_mongo(req=req, lookup=lookup))

        for doc in version_history:
            doc[config.ID_FIELD] = doc[id_field]
            self.enhance(doc)

        return ListCursor(version_history)
예제 #14
0
    def get(self, req, lookup):
        """
        Version of an article in Legal Archive isn't maintained by Eve. Overriding this to fetch the version history.
        """

        resource_def = app.config['DOMAIN'][LEGAL_ARCHIVE_NAME]
        id_field = versioned_id_field(resource_def)

        if req and req.args and req.args.get(config.ID_FIELD):
            version_history = list(super().get_from_mongo(req=ParsedRequest(),
                                                          lookup={id_field: req.args.get(config.ID_FIELD)}))
        else:
            version_history = list(super().get_from_mongo(req=req, lookup=lookup))

        for doc in version_history:
            doc[config.ID_FIELD] = doc[id_field]
            self.enhance(doc)

        return ListCursor(version_history)
예제 #15
0
    def _duplicate_history(self, old_id, new_doc):
        """Duplicates history for an item.

        Duplicates the history of the article identified by old_id. Each history identifiers are changed
        to have the identifiers of new_doc.

        :param old_id: identifier to fetch history
        :param new_doc: identifiers from this doc will be used to create version history for the duplicated item.
        """
        resource_def = app.config['DOMAIN']['archive']
        version_id = versioned_id_field(resource_def)
        old_history_items = get_resource_service('archive_history').get(req=None, lookup={'item_id': old_id})

        new_history_items = []
        for old_history_item in old_history_items:
            old_history_item[version_id] = new_doc[config.ID_FIELD]
            del old_history_item[config.ID_FIELD]
            old_history_item['item_id'] = new_doc['guid']
            new_history_items.append(old_history_item)

        if new_history_items:
            get_resource_service('archive_history').post(new_history_items)
예제 #16
0
    def _duplicate_history(self, old_id, new_doc):
        """Duplicates history for an item.

        Duplicates the history of the article identified by old_id. Each history identifiers are changed
        to have the identifiers of new_doc.

        :param old_id: identifier to fetch history
        :param new_doc: identifiers from this doc will be used to create version history for the duplicated item.
        """
        resource_def = app.config['DOMAIN']['archive']
        version_id = versioned_id_field(resource_def)
        old_history_items = get_resource_service('archive_history').get(req=None, lookup={'item_id': old_id})

        new_history_items = []
        for old_history_item in old_history_items:
            old_history_item[version_id] = new_doc[config.ID_FIELD]
            del old_history_item[config.ID_FIELD]
            old_history_item['item_id'] = new_doc['guid']
            new_history_items.append(old_history_item)

        if new_history_items:
            get_resource_service('archive_history').post(new_history_items)
예제 #17
0
 def get_version(self, id, version, formatter_name):
     formatter = self._get_formatter(formatter_name)
     if not formatter:
         abort(404)
     if version:
         item = get_resource_service('items_versions').find_one(
             req=None, _id_document=id, version=version)
         if not item:
             abort(404)
         resource_def = app.config['DOMAIN']['items']
         id_field = versioned_id_field(resource_def)
         item['_id'] = item[id_field]
     else:
         item = get_resource_service('items').find_one(req=None, _id=id)
         if not item:
             abort(404)
     # Ensure that the item has not expired
     if utcnow() - timedelta(
             days=int(get_setting('news_api_time_limit_days'))) > item.get(
                 'versioncreated', utcnow()):
         abort(404)
     ret = formatter.format_item(item)
     return {'formatted_item': ret, 'mimetype': formatter.MIMETYPE}
예제 #18
0
파일: get.py 프로젝트: kidaa/eve
def getitem(resource, **lookup):
    """
    :param resource: the name of the resource to which the document belongs.
    :param **lookup: the lookup query.

    .. versionchanged:: 0.6
       Handle soft deleted documents

    .. versionchanged:: 0.5
       Allow ``?version=all`` requests to fire ``on_fetched_*`` events.
       Create pagination links for document versions. (#475)
       Pagination links reflect current query. (#464)

    .. versionchanged:: 0.4
       HATOEAS link for contains the business unit value even when
       regexes have been configured for the resource endpoint.
       'on_fetched' now returns the whole response (HATEOAS metafields
       included.)
       Support for document versioning.
       Changed ``on_fetch_*`` changed to ``on_fetched_*``.

    .. versionchanged:: 0.3
       Support for media fields.
       When IF_MATCH is disabled, no etag is included in the payload.

    .. versionchanged:: 0.1.1
       Support for Embeded Resource Serialization.

    .. versionchanged:: 0.1.0
       Support for optional HATEOAS.

    .. versionchanged: 0.0.8
       'on_getting_item' event is raised when a document has been read from the
       database and is about to be sent to the client.

    .. versionchanged:: 0.0.7
       Support for Rate-Limiting.

    .. versionchanged:: 0.0.6
       Support for HEAD requests.

    .. versionchanged:: 0.0.6
        ETag added to payload.

    .. versionchanged:: 0.0.5
       Support for user-restricted access to resources.
       Support for LAST_UPDATED field missing from documents, because they were
       created outside the API context.

    .. versionchanged:: 0.0.4
       Added the ``requires_auth`` decorator.

    .. versionchanged:: 0.0.3
       Superflous ``response`` container removed. Links wrapped with
       ``_links``. Links are now properly JSON formatted.
    """
    req = parse_request(resource)
    resource_def = config.DOMAIN[resource]
    embedded_fields = resolve_embedded_fields(resource, req)

    soft_delete_enabled = config.DOMAIN[resource]['soft_delete']
    if soft_delete_enabled:
        # GET requests should always fetch soft deleted documents from the db
        # They are handled and included in 404 responses below.
        req.show_deleted = True

    document = app.data.find_one(resource, req, **lookup)
    if not document:
        abort(404)

    response = {}
    etag = None
    version = request.args.get(config.VERSION_PARAM)
    latest_doc = None
    cursor = None

    # calculate last_modified before get_old_document rolls back the document,
    # allowing us to invalidate the cache when _latest_version changes
    last_modified = last_updated(document)

    # synthesize old document version(s)
    if resource_def['versioning'] is True:
        latest_doc = document
        document = get_old_document(
            resource, req, lookup, document, version)

    # meld into response document
    build_response_document(document, resource, embedded_fields, latest_doc)
    if config.IF_MATCH:
        etag = document[config.ETAG]

    # check embedded fields resolved in build_response_document() for more
    # recent last updated timestamps. We don't want to respond 304 if embedded
    # fields have changed
    for field in embedded_fields:
        embedded_document = document.get(field)
        if isinstance(embedded_document, dict):
            embedded_last_updated = last_updated(embedded_document)
            if embedded_last_updated > last_modified:
                last_modified = embedded_last_updated

    # facilitate client caching by returning a 304 when appropriate
    cache_validators = {True: 0, False: 0}
    if req.if_modified_since:
        cache_valid = (last_modified <= req.if_modified_since)
        cache_validators[cache_valid] += 1
    if req.if_none_match:
        if (resource_def['versioning'] is False) or \
           (document[app.config['VERSION']] ==
                document[app.config['LATEST_VERSION']]):
            cache_valid = (etag == req.if_none_match)
            cache_validators[cache_valid] += 1
    # If all cache validators are true, return 304
    if (cache_validators[True] > 0) and (cache_validators[False] == 0):
        return {}, last_modified, etag, 304

    if version == 'all' or version == 'diffs':
        # find all versions
        lookup[versioned_id_field()] = lookup[app.config['ID_FIELD']]
        del lookup[app.config['ID_FIELD']]
        if version == 'diffs' or req.sort is None:
            # default sort for 'all', required sort for 'diffs'
            req.sort = '[("%s", 1)]' % config.VERSION
        req.if_modified_since = None  # we always want the full history here
        cursor = app.data.find(resource + config.VERSIONS, req, lookup)

        # build all versions
        documents = []
        if cursor.count() == 0:
            # this is the scenario when the document existed before
            # document versioning got turned on
            documents.append(latest_doc)
        else:
            last_document = {}

            # if we aren't starting on page 1, then we need to init last_doc
            if version == 'diffs' and req.page > 1:
                # grab the last document on the previous page to diff from
                last_version = cursor[0][app.config['VERSION']] - 1
                last_document = get_old_document(
                    resource, req, lookup, latest_doc, last_version)

            for i, document in enumerate(cursor):
                document = synthesize_versioned_document(
                    latest_doc, document, resource_def)
                build_response_document(
                    document, resource, embedded_fields, latest_doc)
                if version == 'diffs':
                    if i == 0:
                        documents.append(document)
                    else:
                        documents.append(diff_document(
                            resource_def, last_document, document))
                    last_document = document
                else:
                    documents.append(document)

        # add documents to response
        if config.DOMAIN[resource]['hateoas']:
            response[config.ITEMS] = documents
        else:
            response = documents
    elif soft_delete_enabled and document.get(config.DELETED) is True:
        # This document was soft deleted. Respond with 404 and the deleted
        # version of the document.
        document[config.STATUS] = config.STATUS_ERR,
        document[config.ERROR] = {
            'code': 404,
            'message': 'The requested URL was not found on this server.'
        }
        return document, last_modified, etag, 404
    else:
        response = document

    # extra hateoas links
    if config.DOMAIN[resource]['hateoas']:
        # use the id of the latest document for multi-document requests
        if cursor:
            count = cursor.count(with_limit_and_skip=False)
            response[config.LINKS] = \
                _pagination_links(resource, req, count,
                                  latest_doc[config.ID_FIELD])
            if config.DOMAIN[resource]['pagination']:
                response[config.META] = _meta_links(req, count)
        else:
            response[config.LINKS] = \
                _pagination_links(resource, req, None,
                                  response[config.ID_FIELD])

    # callbacks not supported on version diffs because of partial documents
    if version != 'diffs':
        # TODO: callbacks not currently supported with ?version=all

        # notify registered callback functions. Please note that, should
        # the functions modify the document, last_modified and etag
        # won't be updated to reflect the changes (they always reflect the
        # documents state on the database).
        if resource_def['versioning'] is True and version == 'all':
            versions = response
            if config.DOMAIN[resource]['hateoas']:
                versions = response[config.ITEMS]
            for version_item in versions:
                getattr(app, "on_fetched_item")(resource, version_item)
                getattr(app, "on_fetched_item_%s" % resource)(version_item)
        else:
            getattr(app, "on_fetched_item")(resource, response)
            getattr(app, "on_fetched_item_%s" % resource)(response)

    return response, last_modified, etag, 200
예제 #19
0
파일: get.py 프로젝트: Acapla/eve
def getitem(resource, **lookup):
    """
    :param resource: the name of the resource to which the document belongs.
    :param **lookup: the lookup query.

    .. versionchanged:: 0.4
       HATOEAS link for contains the business unit value even when
       regexes have been configured for the resource endpoint.
       'on_fetched' now returns the whole response (HATEOAS metafields
       included.)
       Support for document versioning.
       Changed ``on_fetch_*`` changed to ``on_fetched_*``.

    .. versionchanged:: 0.3
       Support for media fields.
       When IF_MATCH is disabled, no etag is included in the payload.

    .. versionchanged:: 0.1.1
       Support for Embeded Resource Serialization.

    .. versionchanged:: 0.1.0
       Support for optional HATEOAS.

    .. versionchanged: 0.0.8
       'on_getting_item' event is raised when a document has been read from the
       database and is about to be sent to the client.

    .. versionchanged:: 0.0.7
       Support for Rate-Limiting.

    .. versionchanged:: 0.0.6
       Support for HEAD requests.

    .. versionchanged:: 0.0.6
        ETag added to payload.

    .. versionchanged:: 0.0.5
       Support for user-restricted access to resources.
       Support for LAST_UPDATED field missing from documents, because they were
       created outside the API context.

    .. versionchanged:: 0.0.4
       Added the ``requires_auth`` decorator.

    .. versionchanged:: 0.0.3
       Superflous ``response`` container removed. Links wrapped with
       ``_links``. Links are now properly JSON formatted.
    """
    req = parse_request(resource)
    resource_def = config.DOMAIN[resource]
    embedded_fields = resolve_embedded_fields(resource, req)

    document = app.data.find_one(resource, req, **lookup)
    if not document:
        abort(404)

    response = {}
    etag = None
    version = request.args.get(config.VERSION_PARAM)
    latest_doc = None

    # synthesize old document version(s)
    if resource_def['versioning'] is True:
        latest_doc = copy.deepcopy(document)
        document = get_old_document(
            resource, req, lookup, document, version)

    # meld into response document
    build_response_document(document, resource, embedded_fields, latest_doc)

    # last_modified for the response
    last_modified = document[config.LAST_UPDATED]

    # facilitate client caching by returning a 304 when appropriate
    if config.IF_MATCH:
        etag = document[config.ETAG]

        if req.if_none_match and etag == req.if_none_match:
            # request etag matches the current server representation of the
            # document, return a 304 Not-Modified.
            return {}, last_modified, document[config.ETAG], 304

    if req.if_modified_since and last_modified <= req.if_modified_since:
        # request If-Modified-Since conditional request match. We test
        # this after the etag since Last-Modified dates have lower
        # resolution (1 second).
        return {}, last_modified, document.get(config.ETAG), 304

    if version == 'all' or version == 'diffs':
        # find all versions
        lookup[versioned_id_field()] = lookup[app.config['ID_FIELD']]
        del lookup[app.config['ID_FIELD']]
        if version == 'diffs' or req.sort is None:
            # default sort for 'all', required sort for 'diffs'
            req.sort = '[("%s", 1)]' % config.VERSION
        req.if_modified_since = None  # we always want the full history here
        cursor = app.data.find(resource + config.VERSIONS, req, lookup)

        # build all versions
        documents = []
        if cursor.count() == 0:
            # this is the scenario when the document existed before
            # document versioning got turned on
            documents.append(latest_doc)
        else:
            last_document = {}

            # if we aren't starting on page 1, then we need to init last_doc
            if version == 'diffs' and req.page > 1:
                # grab the last document on the previous page to diff from
                last_version = cursor[0][app.config['VERSION']] - 1
                last_document = get_old_document(
                    resource, req, lookup, latest_doc, last_version)

            for i, document in enumerate(cursor):
                document = synthesize_versioned_document(
                    latest_doc, document, resource_def)
                build_response_document(
                    document, resource, embedded_fields, latest_doc)
                if version == 'diffs':
                    if i == 0:
                        documents.append(document)
                    else:
                        documents.append(diff_document(
                            resource_def, last_document, document))
                    last_document = document
                else:
                    documents.append(document)

        # add documents to response
        if config.DOMAIN[resource]['hateoas']:
            response[config.ITEMS] = documents
        else:
            response = documents
    else:
        response = document

    # extra hateoas links
    if config.DOMAIN[resource]['hateoas']:
        if config.LINKS not in response:
            response[config.LINKS] = {}
        response[config.LINKS]['collection'] = {
            'title': config.DOMAIN[resource]['resource_title'],
            'href': resource_link()}
        response[config.LINKS]['parent'] = home_link()

    if version != 'all' and version != 'diffs':
        # TODO: callbacks not currently supported with ?version=all

        # notify registered callback functions. Please note that, should
        # the # functions modify the document, last_modified and etag
        # won't be updated to reflect the changes (they always reflect the
        # documents state on the database).
        getattr(app, "on_fetched_item")(resource, response)
        getattr(app, "on_fetched_item_%s" % resource)(response)

    return response, last_modified, etag, 200
예제 #20
0
    def upsert_into_legal_archive(self, item_id):
        """
        Once publish actions are performed on the article do the below:
            1.  Get legal archive article.
            2.  De-normalize the expired article
            3.  Upserting Legal Archive.
            4.  Get Version History and De-normalize and Inserting Legal Archive Versions
        :param dict item_id: id of the document from 'archive' collection.
        """
        try:

            logger.warning('Import item into legal {}.'.format(item_id))

            doc = get_resource_service(ARCHIVE).find_one(req=None, _id=item_id)

            if not doc:
                logger.error(
                    'Could not find the document {} to import to legal archive.'
                    .format(item_id))
                return

            # setting default values in case they are missing other log message will fail.
            doc.setdefault('unique_name', 'NO UNIQUE NAME')
            doc.setdefault(config.VERSION, 1)
            doc.setdefault('expiry', utcnow())

            if not doc.get(ITEM_STATE) in {
                    CONTENT_STATE.PUBLISHED, CONTENT_STATE.CORRECTED,
                    CONTENT_STATE.KILLED
            }:
                # at times we have seen that item is published but the item is different in the archive collection
                # this will notify admins about the issue but proceed to move the item into legal archive.
                msg = 'Invalid state: {}. Moving the item to legal archive. item: {}'.\
                    format(doc.get(ITEM_STATE), self.log_msg_format.format(**doc))
                logger.error(msg)
                update_notifiers(ACTIVITY_ERROR, msg=msg, resource=ARCHIVE)

            # required for behave test.
            legal_archive_doc = deepcopy(doc)
            legal_archive_service = get_resource_service(LEGAL_ARCHIVE_NAME)
            legal_archive_versions_service = get_resource_service(
                LEGAL_ARCHIVE_VERSIONS_NAME)

            log_msg = self.log_msg_format.format(**legal_archive_doc)
            version_id_field = versioned_id_field(
                app.config['DOMAIN'][ARCHIVE])
            logger.info(
                'Preparing Article to be inserted into Legal Archive ' +
                log_msg)

            # Removing irrelevant properties
            legal_archive_doc.pop(config.ETAG, None)
            legal_archive_doc.pop('lock_user', None)
            legal_archive_doc.pop('lock_session', None)
            legal_archive_doc.pop('lock_time', None)

            logger.info(
                'Removed irrelevant properties from the article {}'.format(
                    log_msg))

            # Step 1
            article_in_legal_archive = legal_archive_service.find_one(
                req=None, _id=legal_archive_doc[config.ID_FIELD])

            if article_in_legal_archive and \
               article_in_legal_archive.get(config.VERSION, 0) > legal_archive_doc.get(config.VERSION):
                logger.info(
                    'Item {} version: {} already in legal archive. Legal Archive document version {}'
                    .format(legal_archive_doc.get(config.ID_FIELD),
                            legal_archive_doc.get(config.VERSION),
                            article_in_legal_archive.get(config.VERSION)))
                self._set_moved_to_legal(doc)
                return

            # Step 2 - De-normalizing the legal archive doc
            self._denormalize_user_desk(legal_archive_doc, log_msg)
            logger.info('De-normalized article {}'.format(log_msg))

            # Step 3 - Upserting Legal Archive
            logger.info(
                'Upserting Legal Archive Repo with article {}'.format(log_msg))

            if article_in_legal_archive:
                legal_archive_service.put(legal_archive_doc[config.ID_FIELD],
                                          legal_archive_doc)
            else:
                legal_archive_service.post([legal_archive_doc])

            # Step 4 - Get Version History and De-normalize and Inserting Legal Archive Versions
            lookup = {version_id_field: legal_archive_doc[config.ID_FIELD]}
            version_history = list(
                get_resource_service('archive_versions').get(req=None,
                                                             lookup=lookup))
            legal_version_history = list(
                legal_archive_versions_service.get(req=None, lookup=lookup))

            logger.info(
                'Fetched version history for article {}'.format(log_msg))
            versions_to_insert = [
                version for version in version_history
                if not any(legal_version
                           for legal_version in legal_version_history
                           if version[config.VERSION] == legal_version[
                               config.VERSION])
            ]

            # This happens when user kills an article from Dusty Archive
            if article_in_legal_archive and \
               article_in_legal_archive[config.VERSION] < legal_archive_doc[config.VERSION] and \
               len(versions_to_insert) == 0:

                resource_def = app.config['DOMAIN'][ARCHIVE]
                versioned_doc = deepcopy(legal_archive_doc)
                versioned_doc[versioned_id_field(
                    resource_def)] = legal_archive_doc[config.ID_FIELD]
                versioned_doc[config.ID_FIELD] = ObjectId()
                versions_to_insert.append(versioned_doc)

            for version_doc in versions_to_insert:
                self._denormalize_user_desk(
                    version_doc,
                    self.log_msg_format.format(
                        _id=version_doc[version_id_field],
                        unique_name=version_doc.get('unique_name'),
                        _current_version=version_doc[config.VERSION],
                        expiry=version_doc.get('expiry')))
                version_doc.pop(config.ETAG, None)

            if versions_to_insert:
                legal_archive_versions_service.post(versions_to_insert)
                logger.info(
                    'Inserted de-normalized version history for article {}'.
                    format(log_msg))

            # Set the flag that item is moved to legal.
            self._set_moved_to_legal(doc)

            logger.info('Upsert completed for article ' + log_msg)
        except:
            logger.exception(
                'Failed to import into legal archive {}.'.format(item_id))
            raise
예제 #21
0
def getitem(resource, **lookup):
    """
    :param resource: the name of the resource to which the document belongs.
    :param **lookup: the lookup query.

    .. versionchanged:: 0.4
       HATOEAS link for contains the business unit value even when
       regexes have been configured for the resource endpoint.
       'on_fetched' now returns the whole response (HATEOAS metafields
       included.)
       Support for document versioning.
       Changed ``on_fetch_*`` changed to ``on_fetched_*``.

    .. versionchanged:: 0.3
       Support for media fields.
       When IF_MATCH is disabled, no etag is included in the payload.

    .. versionchanged:: 0.1.1
       Support for Embeded Resource Serialization.

    .. versionchanged:: 0.1.0
       Support for optional HATEOAS.

    .. versionchanged: 0.0.8
       'on_getting_item' event is raised when a document has been read from the
       database and is about to be sent to the client.

    .. versionchanged:: 0.0.7
       Support for Rate-Limiting.

    .. versionchanged:: 0.0.6
       Support for HEAD requests.

    .. versionchanged:: 0.0.6
        ETag added to payload.

    .. versionchanged:: 0.0.5
       Support for user-restricted access to resources.
       Support for LAST_UPDATED field missing from documents, because they were
       created outside the API context.

    .. versionchanged:: 0.0.4
       Added the ``requires_auth`` decorator.

    .. versionchanged:: 0.0.3
       Superflous ``response`` container removed. Links wrapped with
       ``_links``. Links are now properly JSON formatted.
    """
    req = parse_request(resource)
    resource_def = config.DOMAIN[resource]
    embedded_fields = resolve_embedded_fields(resource, req)

    document = app.data.find_one(resource, req, **lookup)
    if not document:
        abort(404)

    response = {}
    etag = None
    version = request.args.get(config.VERSION_PARAM)
    latest_doc = None

    # synthesize old document version(s)
    if resource_def['versioning'] is True:
        latest_doc = copy.deepcopy(document)
        document = get_old_document(resource, req, lookup, document, version)

    # meld into response document
    build_response_document(document, resource, embedded_fields, latest_doc)

    # last_modified for the response
    last_modified = document[config.LAST_UPDATED]

    # facilitate client caching by returning a 304 when appropriate
    if config.IF_MATCH:
        etag = document[config.ETAG]

        if req.if_none_match and etag == req.if_none_match:
            # request etag matches the current server representation of the
            # document, return a 304 Not-Modified.
            return {}, last_modified, document[config.ETAG], 304

    if req.if_modified_since and last_modified <= req.if_modified_since:
        # request If-Modified-Since conditional request match. We test
        # this after the etag since Last-Modified dates have lower
        # resolution (1 second).
        return {}, last_modified, document.get(config.ETAG), 304

    if version == 'all' or version == 'diffs':
        # find all versions
        lookup[versioned_id_field()] = lookup[app.config['ID_FIELD']]
        del lookup[app.config['ID_FIELD']]
        if version == 'diffs' or req.sort is None:
            # default sort for 'all', required sort for 'diffs'
            req.sort = '[("%s", 1)]' % config.VERSION
        req.if_modified_since = None  # we always want the full history here
        cursor = app.data.find(resource + config.VERSIONS, req, lookup)

        # build all versions
        documents = []
        if cursor.count() == 0:
            # this is the scenario when the document existed before
            # document versioning got turned on
            documents.append(latest_doc)
        else:
            last_document = {}

            # if we aren't starting on page 1, then we need to init last_doc
            if version == 'diffs' and req.page > 1:
                # grab the last document on the previous page to diff from
                last_version = cursor[0][app.config['VERSION']] - 1
                last_document = get_old_document(resource, req, lookup,
                                                 latest_doc, last_version)

            for i, document in enumerate(cursor):
                document = synthesize_versioned_document(
                    latest_doc, document, resource_def)
                build_response_document(document, resource, embedded_fields,
                                        latest_doc)
                if version == 'diffs':
                    if i == 0:
                        documents.append(document)
                    else:
                        documents.append(
                            diff_document(resource_def, last_document,
                                          document))
                    last_document = document
                else:
                    documents.append(document)

        # add documents to response
        if config.DOMAIN[resource]['hateoas']:
            response[config.ITEMS] = documents
        else:
            response = documents
    else:
        response = document

    # extra hateoas links
    if config.DOMAIN[resource]['hateoas']:
        if config.LINKS not in response:
            response[config.LINKS] = {}
        response[config.LINKS]['collection'] = {
            'title': config.DOMAIN[resource]['resource_title'],
            'href': resource_link()
        }
        response[config.LINKS]['parent'] = home_link()

    if version != 'all' and version != 'diffs':
        # TODO: callbacks not currently supported with ?version=all

        # notify registered callback functions. Please note that, should
        # the # functions modify the document, last_modified and etag
        # won't be updated to reflect the changes (they always reflect the
        # documents state on the database).
        getattr(app, "on_fetched_item")(resource, response)
        getattr(app, "on_fetched_item_%s" % resource)(response)

    return response, last_modified, etag, 200
예제 #22
0
    def upsert_into_legal_archive(self, item_id):
        """Once publish actions are performed on the article do the below:

            1.  Get legal archive article.
            2.  De-normalize the expired article
            3.  Upserting Legal Archive.
            4.  Get Version History and De-normalize and Inserting Legal Archive Versions
            5.  Get History and de-normalize and insert into Legal Archive History

        :param dict item_id: id of the document from 'archive' collection.
        """
        try:

            logger.info('Import item into legal {}.'.format(item_id))

            doc = get_resource_service(ARCHIVE).find_one(req=None, _id=item_id)

            if not doc:
                logger.error('Could not find the document {} to import to legal archive.'.format(item_id))
                return

            # setting default values in case they are missing other log message will fail.
            doc.setdefault('unique_name', 'NO UNIQUE NAME')
            doc.setdefault(config.VERSION, 1)
            doc.setdefault('expiry', utcnow())

            if not doc.get(ITEM_STATE) in {CONTENT_STATE.PUBLISHED, CONTENT_STATE.CORRECTED, CONTENT_STATE.KILLED}:
                # at times we have seen that item is published but the item is different in the archive collection
                # this will notify admins about the issue but proceed to move the item into legal archive.
                msg = 'Invalid state: {}. Moving the item to legal archive. item: {}'.\
                    format(doc.get(ITEM_STATE), self.log_msg_format.format(**doc))
                logger.error(msg)
                update_notifiers(ACTIVITY_ERROR, msg=msg, resource=ARCHIVE)

            # required for behave test.
            legal_archive_doc = deepcopy(doc)
            legal_archive_service = get_resource_service(LEGAL_ARCHIVE_NAME)
            legal_archive_versions_service = get_resource_service(LEGAL_ARCHIVE_VERSIONS_NAME)
            legal_archive_history_service = get_resource_service(LEGAL_ARCHIVE_HISTORY_NAME)

            log_msg = self.log_msg_format.format(**legal_archive_doc)
            version_id_field = versioned_id_field(app.config['DOMAIN'][ARCHIVE])
            logger.info('Preparing Article to be inserted into Legal Archive ' + log_msg)

            # Removing irrelevant properties
            legal_archive_doc.pop(config.ETAG, None)
            legal_archive_doc.pop('lock_user', None)
            legal_archive_doc.pop('lock_session', None)
            legal_archive_doc.pop('lock_time', None)
            legal_archive_doc.pop('lock_action', None)

            logger.info('Removed irrelevant properties from the article {}'.format(log_msg))

            # Step 1
            article_in_legal_archive = legal_archive_service.find_one(req=None, _id=legal_archive_doc[config.ID_FIELD])

            if article_in_legal_archive and \
               article_in_legal_archive.get(config.VERSION, 0) > legal_archive_doc.get(config.VERSION):
                logger.info('Item {} version: {} already in legal archive. Legal Archive document version {}'.format(
                    legal_archive_doc.get(config.ID_FIELD), legal_archive_doc.get(config.VERSION),
                    article_in_legal_archive.get(config.VERSION)
                ))
                self._set_moved_to_legal(doc)
                return

            # Step 2 - De-normalizing the legal archive doc
            self._denormalize_user_desk(legal_archive_doc, log_msg)
            logger.info('De-normalized article {}'.format(log_msg))

            # Step 3 - Upserting Legal Archive
            logger.info('Upserting Legal Archive Repo with article {}'.format(log_msg))

            if article_in_legal_archive:
                legal_archive_service.put(legal_archive_doc[config.ID_FIELD], legal_archive_doc)
            else:
                legal_archive_service.post([legal_archive_doc])

            # Step 4 - Get Versions and De-normalize and Inserting Legal Archive Versions
            lookup = {version_id_field: legal_archive_doc[config.ID_FIELD]}
            versions = list(get_resource_service('archive_versions').get(req=None, lookup=lookup))
            legal_versions = list(legal_archive_versions_service.get(req=None, lookup=lookup))

            logger.info('Fetched version history for article {}'.format(log_msg))
            versions_to_insert = [version for version in versions
                                  if not any(legal_version for legal_version in legal_versions
                                             if version[config.VERSION] == legal_version[config.VERSION])]

            # Step 5 - Get History and de-normalize and insert into Legal Archive History
            lookup = {'item_id': legal_archive_doc[config.ID_FIELD]}
            history_items = list(get_resource_service('archive_history').get(req=None, lookup=lookup))
            legal_history_items = list(legal_archive_history_service.get(req=None, lookup=lookup))

            logger.info('Fetched history for article {}'.format(log_msg))
            history_to_insert = [history for history in history_items
                                 if not any(legal_version for legal_version in legal_history_items
                                            if history[config.ID_FIELD] == legal_version[config.ID_FIELD])]

            # This happens when user kills an article from Dusty Archive
            if article_in_legal_archive and \
               article_in_legal_archive[config.VERSION] < legal_archive_doc[config.VERSION] and \
               len(versions_to_insert) == 0:

                resource_def = app.config['DOMAIN'][ARCHIVE]
                versioned_doc = deepcopy(legal_archive_doc)
                versioned_doc[versioned_id_field(resource_def)] = legal_archive_doc[config.ID_FIELD]
                versioned_doc[config.ID_FIELD] = ObjectId()
                versions_to_insert.append(versioned_doc)

            for version_doc in versions_to_insert:
                self._denormalize_user_desk(version_doc,
                                            self.log_msg_format.format(_id=version_doc[version_id_field],
                                                                       unique_name=version_doc.get('unique_name'),
                                                                       _current_version=version_doc[config.VERSION],
                                                                       expiry=version_doc.get('expiry')))
                version_doc.pop(config.ETAG, None)

            if versions_to_insert:
                legal_archive_versions_service.post(versions_to_insert)
                logger.info('Inserted de-normalized versions for article {}'.format(log_msg))

            for history_doc in history_to_insert:
                self._denormalize_history(history_doc)
                history_doc.pop(config.ETAG, None)

            if history_to_insert:
                legal_archive_history_service.post(history_to_insert)
                logger.info('Inserted de-normalized history for article {}'.format(log_msg))

            # Set the flag that item is moved to legal.
            self._set_moved_to_legal(doc)

            logger.info('Upsert completed for article ' + log_msg)
        except:
            logger.exception('Failed to import into legal archive {}.'.format(item_id))
            raise
예제 #23
0
    def upsert_into_legal_archive(self, item_id):
        """
        Once publish actions are performed on the article do the below:
            1.  Get legal archive article.
            2.  De-normalize the expired article
            3.  Upserting Legal Archive.
            4.  Get Version History and De-normalize and Inserting Legal Archive Versions
        :param dict item_id: id of the document from 'archive' collection.
        """
        try:

            logger.warning("Import item into legal {}.".format(item_id))

            doc = get_resource_service(ARCHIVE).find_one(req=None, _id=item_id)

            if not doc:
                logger.error("Could not find the document {} to import to legal archive.".format(item_id))
                return

            if not doc.get(ITEM_STATE) in {CONTENT_STATE.PUBLISHED, CONTENT_STATE.CORRECTED, CONTENT_STATE.KILLED}:
                logger.error(
                    "Invalid state: {}. Cannot move the item to legal archive. item: {}".format(
                        doc.get(ITEM_STATE), self.log_msg_format.format(**doc)
                    )
                )
                return

            # required for behave test.
            legal_archive_doc = deepcopy(doc)

            legal_archive_service = get_resource_service(LEGAL_ARCHIVE_NAME)
            legal_archive_versions_service = get_resource_service(LEGAL_ARCHIVE_VERSIONS_NAME)

            log_msg = self.log_msg_format.format(**legal_archive_doc)
            version_id_field = versioned_id_field(app.config["DOMAIN"][ARCHIVE])
            logger.info("Preparing Article to be inserted into Legal Archive " + log_msg)

            # Removing irrelevant properties
            legal_archive_doc.pop(config.ETAG, None)
            legal_archive_doc.pop("lock_user", None)
            legal_archive_doc.pop("lock_session", None)
            legal_archive_doc.pop("lock_time", None)

            logger.info("Removed irrelevant properties from the article {}".format(log_msg))

            # Step 1
            article_in_legal_archive = legal_archive_service.find_one(req=None, _id=legal_archive_doc[config.ID_FIELD])

            if article_in_legal_archive and article_in_legal_archive.get(config.VERSION, 0) > legal_archive_doc.get(
                config.VERSION
            ):
                logger.info(
                    "Item {} version: {} already in legal archive. Legal Archive document version {}".format(
                        legal_archive_doc.get(config.ID_FIELD),
                        legal_archive_doc.get(config.VERSION),
                        article_in_legal_archive.get(config.VERSION),
                    )
                )
                self._set_moved_to_legal(doc)
                return

            # Step 2 - De-normalizing the legal archive doc
            self._denormalize_user_desk(legal_archive_doc, log_msg)
            logger.info("De-normalized article {}".format(log_msg))

            # Step 3 - Upserting Legal Archive
            logger.info("Upserting Legal Archive Repo with article {}".format(log_msg))

            if article_in_legal_archive:
                legal_archive_service.put(legal_archive_doc[config.ID_FIELD], legal_archive_doc)
            else:
                legal_archive_service.post([legal_archive_doc])

            # Step 4 - Get Version History and De-normalize and Inserting Legal Archive Versions
            lookup = {version_id_field: legal_archive_doc[config.ID_FIELD]}
            version_history = list(get_resource_service("archive_versions").get(req=None, lookup=lookup))
            legal_version_history = list(legal_archive_versions_service.get(req=None, lookup=lookup))

            logger.info("Fetched version history for article {}".format(log_msg))
            versions_to_insert = [
                version
                for version in version_history
                if not any(
                    legal_version
                    for legal_version in legal_version_history
                    if version[config.VERSION] == legal_version[config.VERSION]
                )
            ]

            # This happens when user kills an article from Dusty Archive
            if (
                article_in_legal_archive
                and article_in_legal_archive[config.VERSION] < legal_archive_doc[config.VERSION]
                and len(versions_to_insert) == 0
            ):

                resource_def = app.config["DOMAIN"][ARCHIVE]
                versioned_doc = deepcopy(legal_archive_doc)
                versioned_doc[versioned_id_field(resource_def)] = legal_archive_doc[config.ID_FIELD]
                versioned_doc[config.ID_FIELD] = ObjectId()
                versions_to_insert.append(versioned_doc)

            for version_doc in versions_to_insert:
                self._denormalize_user_desk(
                    version_doc,
                    self.log_msg_format.format(
                        _id=version_doc[version_id_field],
                        unique_name=version_doc.get("unique_name"),
                        _current_version=version_doc[config.VERSION],
                        expiry=version_doc.get("expiry"),
                    ),
                )
                version_doc.pop(config.ETAG, None)

            if versions_to_insert:
                legal_archive_versions_service.post(versions_to_insert)
                logger.info("Inserted de-normalized version history for article {}".format(log_msg))

            # Set the flag that item is moved to legal.
            self._set_moved_to_legal(doc)

            logger.info("Upsert completed for article " + log_msg)
        except:
            logger.exception("Failed to import into legal archive {}.".format(item_id))
            raise
예제 #24
0
 def _init_article_versions(self):
     resource_def = self.app.config['DOMAIN']['archive_versions']
     version_id = versioned_id_field(resource_def)
     return [{'guid': 'tag:localhost:2015:69b961ab-2816-4b8a-a584-a7b402fed4f9',
              version_id: '1',
              ITEM_TYPE: CONTENT_TYPE.TEXT,
              config.VERSION: 1,
              'urgency': 4,
              'pubstatus': 'usable',
              'firstcreated': utcnow(),
              'byline': 'By Alan Karben',
              'dateline': {'located': {'city': 'Sydney'}},
              'keywords': ['Student', 'Crime', 'Police', 'Missing'],
              'subject': [{'qcode': '17004000', 'name': 'Statistics'},
                          {'qcode': '04001002', 'name': 'Weather'}],
              ITEM_STATE: CONTENT_STATE.DRAFT,
              'expiry': utcnow() + timedelta(minutes=20),
              'unique_name': '#8'},
             {'guid': 'tag:localhost:2015:69b961ab-2816-4b8a-a584-a7b402fed4f9',
              version_id: '1',
              ITEM_TYPE: CONTENT_TYPE.TEXT,
              config.VERSION: 2,
              'urgency': 4,
              'headline': 'Two students missing',
              'pubstatus': 'usable',
              'firstcreated': utcnow(),
              'byline': 'By Alan Karben',
              'dateline': {'located': {'city': 'Sydney'}},
              'keywords': ['Student', 'Crime', 'Police', 'Missing'],
              'subject': [{'qcode': '17004000', 'name': 'Statistics'},
                          {'qcode': '04001002', 'name': 'Weather'}],
              ITEM_STATE: CONTENT_STATE.SUBMITTED,
              'expiry': utcnow() + timedelta(minutes=20),
              'unique_name': '#8'},
             {'guid': 'tag:localhost:2015:69b961ab-2816-4b8a-a584-a7b402fed4f9',
              version_id: '1',
              ITEM_TYPE: CONTENT_TYPE.TEXT,
              config.VERSION: 3,
              'urgency': 4,
              'headline': 'Two students missing',
              'pubstatus': 'usable',
              'firstcreated': utcnow(),
              'byline': 'By Alan Karben',
              'ednote': 'Andrew Marwood contributed to this article',
              'dateline': {'located': {'city': 'Sydney'}},
              'keywords': ['Student', 'Crime', 'Police', 'Missing'],
              'subject': [{'qcode': '17004000', 'name': 'Statistics'},
                          {'qcode': '04001002', 'name': 'Weather'}],
              ITEM_STATE: CONTENT_STATE.PROGRESS,
              'expiry': utcnow() + timedelta(minutes=20),
              'unique_name': '#8'},
             {'guid': 'tag:localhost:2015:69b961ab-2816-4b8a-a584-a7b402fed4f9',
              version_id: '1',
              ITEM_TYPE: CONTENT_TYPE.TEXT,
              config.VERSION: 4,
              'body_html': 'Test body',
              'urgency': 4,
              'headline': 'Two students missing',
              'pubstatus': 'usable',
              'firstcreated': utcnow(),
              'byline': 'By Alan Karben',
              'ednote': 'Andrew Marwood contributed to this article',
              'dateline': {'located': {'city': 'Sydney'}},
              'keywords': ['Student', 'Crime', 'Police', 'Missing'],
              'subject': [{'qcode': '17004000', 'name': 'Statistics'},
                          {'qcode': '04001002', 'name': 'Weather'}],
              ITEM_STATE: CONTENT_STATE.PROGRESS,
              'expiry': utcnow() + timedelta(minutes=20),
              'unique_name': '#8'}]
예제 #25
0
def deleteitem_internal(
        resource, concurrency_check=False, suppress_callbacks=False, **lookup):
    """ Intended for internal delete calls, this method is not rate limited,
    authentication is not checked, pre-request events are not raised, and
    concurrency checking is optional. Deletes a resource item.

    :param resource: name of the resource to which the item(s) belong.
    :param concurrency_check: concurrency check switch (bool)
    :param **lookup: item lookup query.

    .. versionchanged:: 0.6
       Support for soft delete.

    .. versionchanged:: 0.5
       Return 204 NoContent instead of 200.
       Push updates to OpLog.
       Original deleteitem() has been split into deleteitem() and
       deleteitem_internal().

    .. versionchanged:: 0.4
       Fix #284: If you have a media field, and set datasource projection to
       0 for that field, the media will not be deleted.
       Support for document versioning.
       'on_delete_item' events raised before performing the delete.
       'on_deleted_item' events raised after performing the delete.

    .. versionchanged:: 0.3
       Delete media files as needed.
       Pass the explicit query filter to the data driver, as it does not
       support the id argument anymore.

    .. versionchanged:: 0.2
       Raise pre_<method> event.

    .. versionchanged:: 0.0.7
       Support for Rate-Limiting.

    .. versionchanged:: 0.0.5
      Pass current resource to ``parse_request``, allowing for proper
      processing of new configuration settings: `filters`, `sorting`, `paging`.

    .. versionchanged:: 0.0.4
       Added the ``requires_auth`` decorator.
    """
    resource_def = config.DOMAIN[resource]
    soft_delete_enabled = resource_def['soft_delete']
    original = get_document(resource, concurrency_check, **lookup)
    if not original or (soft_delete_enabled and
                        original.get(config.DELETED) is True):
        abort(404)

    # notify callbacks
    if suppress_callbacks is not True:
        getattr(app, "on_delete_item")(resource, original)
        getattr(app, "on_delete_item_%s" % resource)(original)

    if soft_delete_enabled:
        # Instead of removing the document from the db, just mark it as deleted
        marked_document = copy.deepcopy(original)

        # Set DELETED flag and update metadata
        last_modified = datetime.utcnow().replace(microsecond=0)
        marked_document[config.DELETED] = True
        marked_document[config.LAST_UPDATED] = last_modified

        if config.IF_MATCH:
            resolve_document_etag(marked_document, resource)

        resolve_document_version(marked_document, resource, 'DELETE', original)

        # Update document in database (including version collection if needed)
        id = original[resource_def['id_field']]
        try:
            app.data.replace(resource, id, marked_document, original)
        except app.data.OriginalChangedError:
            if concurrency_check:
                abort(412, description='Client and server etags don\'t match')

        # create previous version if it wasn't already there
        late_versioning_catch(original, resource)
        # and add deleted version
        insert_versioning_documents(resource, marked_document)
        # update oplog if needed
        oplog_push(resource, marked_document, 'DELETE', id)

    else:
        # Delete the document for real

        # media cleanup
        media_fields = app.config['DOMAIN'][resource]['_media']

        # document might miss one or more media fields because of datasource
        # and/or client projection.
        missing_media_fields = [f for f in media_fields if f not in original]
        if len(missing_media_fields):
            # retrieve the whole document so we have all media fields available
            # Should be very a rare occurence. We can't get rid of the
            # get_document() call since it also deals with etag matching, which
            # is still needed. Also, this lookup should never fail.
            # TODO not happy with this hack. Not at all. Is there a better way?
            original = app.data.find_one_raw(
                resource, original[resource_def['id_field']])

        for field in media_fields:
            if field in original:
                app.media.delete(original[field], resource)

        id = original[resource_def['id_field']]
        app.data.remove(resource, {resource_def['id_field']: id})

        # TODO: should attempt to delete version collection even if setting is
        # off
        if app.config['DOMAIN'][resource]['versioning'] is True:
            app.data.remove(
                resource + config.VERSIONS,
                {versioned_id_field(resource_def):
                 original[resource_def['id_field']]})

        # update oplog if needed
        oplog_push(resource, original, 'DELETE', id)

    if suppress_callbacks is not True:
        getattr(app, "on_deleted_item")(resource, original)
        getattr(app, "on_deleted_item_%s" % resource)(original)

    return {}, None, None, 204
예제 #26
0
 def _init_article_versions(self):
     resource_def = self.app.config['DOMAIN']['archive_versions']
     version_id = versioned_id_field(resource_def)
     return [{'guid': 'tag:localhost:2015:69b961ab-2816-4b8a-a584-a7b402fed4f9',
              version_id: '1',
              ITEM_TYPE: CONTENT_TYPE.TEXT,
              config.VERSION: 1,
              'urgency': 4,
              'pubstatus': 'usable',
              'firstcreated': utcnow(),
              'byline': 'By Alan Karben',
              'dateline': {'located': {'city': 'Sydney'}},
              'keywords': ['Student', 'Crime', 'Police', 'Missing'],
              'subject': [{'qcode': '17004000', 'name': 'Statistics'},
                          {'qcode': '04001002', 'name': 'Weather'}],
              ITEM_STATE: CONTENT_STATE.DRAFT,
              'expiry': utcnow() + timedelta(minutes=20),
              'unique_name': '#8'},
             {'guid': 'tag:localhost:2015:69b961ab-2816-4b8a-a584-a7b402fed4f9',
              version_id: '1',
              ITEM_TYPE: CONTENT_TYPE.TEXT,
              config.VERSION: 2,
              'urgency': 4,
              'headline': 'Two students missing',
              'pubstatus': 'usable',
              'firstcreated': utcnow(),
              'byline': 'By Alan Karben',
              'dateline': {'located': {'city': 'Sydney'}},
              'keywords': ['Student', 'Crime', 'Police', 'Missing'],
              'subject': [{'qcode': '17004000', 'name': 'Statistics'},
                          {'qcode': '04001002', 'name': 'Weather'}],
              ITEM_STATE: CONTENT_STATE.SUBMITTED,
              'expiry': utcnow() + timedelta(minutes=20),
              'unique_name': '#8'},
             {'guid': 'tag:localhost:2015:69b961ab-2816-4b8a-a584-a7b402fed4f9',
              version_id: '1',
              ITEM_TYPE: CONTENT_TYPE.TEXT,
              config.VERSION: 3,
              'urgency': 4,
              'headline': 'Two students missing',
              'pubstatus': 'usable',
              'firstcreated': utcnow(),
              'byline': 'By Alan Karben',
              'ednote': 'Andrew Marwood contributed to this article',
              'dateline': {'located': {'city': 'Sydney'}},
              'keywords': ['Student', 'Crime', 'Police', 'Missing'],
              'subject': [{'qcode': '17004000', 'name': 'Statistics'},
                          {'qcode': '04001002', 'name': 'Weather'}],
              ITEM_STATE: CONTENT_STATE.PROGRESS,
              'expiry': utcnow() + timedelta(minutes=20),
              'unique_name': '#8'},
             {'guid': 'tag:localhost:2015:69b961ab-2816-4b8a-a584-a7b402fed4f9',
              version_id: '1',
              ITEM_TYPE: CONTENT_TYPE.TEXT,
              config.VERSION: 4,
              'body_html': 'Test body',
              'urgency': 4,
              'headline': 'Two students missing',
              'pubstatus': 'usable',
              'firstcreated': utcnow(),
              'byline': 'By Alan Karben',
              'ednote': 'Andrew Marwood contributed to this article',
              'dateline': {'located': {'city': 'Sydney'}},
              'keywords': ['Student', 'Crime', 'Police', 'Missing'],
              'subject': [{'qcode': '17004000', 'name': 'Statistics'},
                          {'qcode': '04001002', 'name': 'Weather'}],
              ITEM_STATE: CONTENT_STATE.PROGRESS,
              'expiry': utcnow() + timedelta(minutes=20),
              'unique_name': '#8'}]
예제 #27
0
def deleteitem_internal(resource, concurrency_check=False, **lookup):
    """ Intended for internal delete calls, this method is not rate limited,
    authentication is not checked, pre-request events are not raised, and
    concurrency checking is optional. Deletes a resource item.

    :param resource: name of the resource to which the item(s) belong.
    :param concurrency_check: concurrency check switch (bool)
    :param **lookup: item lookup query.

    .. versionchanged:: 0.5
       Return 204 NoContent instead of 200.
       Push updates to OpLog.
       Original deleteitem() has been split into deleteitem() and
       deleteitem_internal().

    .. versionchanged:: 0.4
       Fix #284: If you have a media field, and set datasource projection to
       0 for that field, the media will not be deleted.
       Support for document versioning.
       'on_delete_item' events raised before performing the delete.
       'on_deleted_item' events raised after performing the delete.

    .. versionchanged:: 0.3
       Delete media files as needed.
       Pass the explicit query filter to the data driver, as it does not
       support the id argument anymore.

    .. versionchanged:: 0.2
       Raise pre_<method> event.

    .. versionchanged:: 0.0.7
       Support for Rate-Limiting.

    .. versionchanged:: 0.0.5
      Pass current resource to ``parse_request``, allowing for proper
      processing of new configuration settings: `filters`, `sorting`, `paging`.

    .. versionchanged:: 0.0.4
       Added the ``requires_auth`` decorator.
    """
    original = get_document(resource, concurrency_check, **lookup)
    if not original:
        abort(404)

    # notify callbacks
    getattr(app, "on_delete_item")(resource, original)
    getattr(app, "on_delete_item_%s" % resource)(original)

    # media cleanup
    media_fields = app.config['DOMAIN'][resource]['_media']

    # document might miss one or more media fields because of datasource and/or
    # client projection.
    missing_media_fields = [f for f in media_fields if f not in original]
    if len(missing_media_fields):
        # retrieve the whole document so we have all media fields available.
        # Should be very a rare occurence. We can't get rid of the
        # get_document() call since it also deals with etag matching, which is
        # still needed. Also, this lookup should never fail.
        # TODO not happy with this hack. Not at all. Is there a better way?
        original = app.data.find_one_raw(resource, original[config.ID_FIELD])

    for field in media_fields:
        if field in original:
            app.media.delete(original[field])

    id = original[config.ID_FIELD]
    app.data.remove(resource, {config.ID_FIELD: id})

    # update oplog if needed
    oplog_push(resource, original, 'DELETE', id)

    # TODO: should attempt to delete version collection even if setting is off
    if app.config['DOMAIN'][resource]['versioning'] is True:
        app.data.remove(
            resource + config.VERSIONS,
            {versioned_id_field(): original[config.ID_FIELD]})

    getattr(app, "on_deleted_item")(resource, original)
    getattr(app, "on_deleted_item_%s" % resource)(original)

    return {}, None, None, 204
예제 #28
0
    def remove_expired(self, doc):
        """
        Removes the expired published article from 'published' collection. Below is the workflow:
            1.  If doc is a package then recursively move the items in the package to legal archive if the item wasn't
                moved before. And then run the package through the expiry workflow.
            2.  Check if doc has expired. This is needed because when doc is a package and expired but the items in the
                package are not expired. If expired then update allow_post_publish_actions, can_be_removed flags.
            3.  Insert/update the doc in Legal Archive repository
                (a) All references to master data like users, desks ... are de-normalized before inserting into
                    Legal Archive. Same is done to each version of the article.
                (b) Inserts Transmission Details (fetched from publish_queue collection)
            4.  If the doc has expired then remove the transmission details from Publish Queue collection.
            5.  If the doc has expired  and is eligible to be removed from production then remove the article and
                its versions from archive and archive_versions collections respectively.
            6.  Removes the item from published collection, if can_be_removed is True

        :param doc: doc in 'published' collection
        """

        log_msg_format = "{{'_id': {item_id}, 'unique_name': {unique_name}, 'version': {_current_version}, " \
                         "'expired_on': {expiry}}}."
        log_msg = log_msg_format.format(**doc)

        version_id_field = versioned_id_field(app.config['DOMAIN'][ARCHIVE])
        can_be_removed = doc['can_be_removed']

        if not can_be_removed:
            if doc[ITEM_TYPE] == CONTENT_TYPE.COMPOSITE:  # Step 1
                logging.info(
                    'Starting the workflow for removal of the expired package '
                    + log_msg)
                self._handle_expired_package(doc)

            logging.info(
                'Starting the workflow for removal of the expired item ' +
                log_msg)
            is_expired = doc['expiry'] <= utcnow()

            if is_expired:  # Step 2
                updates = self._update_flags(doc, log_msg)
                doc.update(updates)
                can_be_removed = updates.get('can_be_removed', can_be_removed)

            # Step 3
            # publish_queue_items = self._upsert_into_legal_archive(doc, version_id_field, log_msg_format, log_msg)
            publish_queue_items = []
            if is_expired:  # Step 4
                logging.info(
                    'Removing the transmission details for expired item ' +
                    log_msg)
                for publish_queue_item in publish_queue_items:
                    get_resource_service('publish_queue').delete_action(
                        lookup={
                            config.ID_FIELD: publish_queue_item[
                                config.ID_FIELD]
                        })

            if is_expired and self.can_remove_from_production(doc):  # Step 5
                logging.info('Removing the expired item from production ' +
                             log_msg)
                lookup = {
                    '$and': [{
                        version_id_field: doc['item_id']
                    }, {
                        config.VERSION: {
                            '$lte': doc[config.VERSION]
                        }
                    }]
                }
                get_resource_service('archive_versions').delete(lookup)

                get_resource_service(ARCHIVE).delete_action(
                    {config.ID_FIELD: doc['item_id']})

        if can_be_removed:  # Step 6
            logging.info(
                'Removing the expired item from published collection ' +
                log_msg)
            self.delete_by_article_id(_id=doc['item_id'], doc=doc)

        logging.info(
            'Completed the workflow for removing the expired publish item ' +
            log_msg)
예제 #29
0
def deleteitem_internal(resource,
                        concurrency_check=False,
                        suppress_callbacks=False,
                        original=None,
                        **lookup):
    """ Intended for internal delete calls, this method is not rate limited,
    authentication is not checked, pre-request events are not raised, and
    concurrency checking is optional. Deletes a resource item.

    :param resource: name of the resource to which the item(s) belong.
    :param concurrency_check: concurrency check switch (bool)
    :param original: original document if already fetched from the database
    :param **lookup: item lookup query.

    .. versionchanged:: 0.6
       Support for soft delete.

    .. versionchanged:: 0.5
       Return 204 NoContent instead of 200.
       Push updates to OpLog.
       Original deleteitem() has been split into deleteitem() and
       deleteitem_internal().

    .. versionchanged:: 0.4
       Fix #284: If you have a media field, and set datasource projection to
       0 for that field, the media will not be deleted.
       Support for document versioning.
       'on_delete_item' events raised before performing the delete.
       'on_deleted_item' events raised after performing the delete.

    .. versionchanged:: 0.3
       Delete media files as needed.
       Pass the explicit query filter to the data driver, as it does not
       support the id argument anymore.

    .. versionchanged:: 0.2
       Raise pre_<method> event.

    .. versionchanged:: 0.0.7
       Support for Rate-Limiting.

    .. versionchanged:: 0.0.5
      Pass current resource to ``parse_request``, allowing for proper
      processing of new configuration settings: `filters`, `sorting`, `paging`.

    .. versionchanged:: 0.0.4
       Added the ``requires_auth`` decorator.
    """
    resource_def = config.DOMAIN[resource]
    soft_delete_enabled = resource_def["soft_delete"]
    original = get_document(resource,
                            concurrency_check,
                            original,
                            force_auth_field_projection=soft_delete_enabled,
                            **lookup)
    if not original or (soft_delete_enabled
                        and original.get(config.DELETED) is True):
        return all_done()

    # notify callbacks
    if not suppress_callbacks:
        getattr(app, "on_delete_item")(resource, original)
        getattr(app, "on_delete_item_%s" % resource)(original)

    if soft_delete_enabled:
        # Instead of removing the document from the db, just mark it as deleted
        marked_document = copy.deepcopy(original)

        # Set DELETED flag and update metadata
        last_modified = datetime.utcnow().replace(microsecond=0)
        marked_document[config.DELETED] = True
        marked_document[config.LAST_UPDATED] = last_modified

        if config.IF_MATCH:
            resolve_document_etag(marked_document, resource)

        resolve_document_version(marked_document, resource, "DELETE", original)

        # Update document in database (including version collection if needed)
        id = original[resource_def["id_field"]]
        try:
            app.data.replace(resource, id, marked_document, original)
        except app.data.OriginalChangedError:
            if concurrency_check:
                abort(412, description="Client and server etags don't match")

        # create previous version if it wasn't already there
        late_versioning_catch(original, resource)
        # and add deleted version
        insert_versioning_documents(resource, marked_document)
        # update oplog if needed
        oplog_push(resource, marked_document, "DELETE", id)

    else:
        # Delete the document for real

        # media cleanup
        media_fields = app.config["DOMAIN"][resource]["_media"]

        # document might miss one or more media fields because of datasource
        # and/or client projection.
        missing_media_fields = [f for f in media_fields if f not in original]
        if missing_media_fields:
            # retrieve the whole document so we have all media fields available
            # Should be very a rare occurrence. We can't get rid of the
            # get_document() call since it also deals with etag matching, which
            # is still needed. Also, this lookup should never fail.
            # TODO not happy with this hack. Not at all. Is there a better way?
            original = app.data.find_one_raw(resource, **lookup)

        for field in media_fields:
            if field in original:
                media_field = original[field]
                if isinstance(media_field, list):
                    for file_id in media_field:
                        app.media.delete(file_id, resource)
                else:
                    app.media.delete(original[field], resource)

        id = original[resource_def["id_field"]]
        app.data.remove(resource, lookup)

        # TODO: should attempt to delete version collection even if setting is
        # off
        if app.config["DOMAIN"][resource]["versioning"] is True:
            app.data.remove(
                resource + config.VERSIONS,
                {
                    versioned_id_field(resource_def):
                    original[resource_def["id_field"]]
                },
            )

        # update oplog if needed
        oplog_push(resource, original, "DELETE", id)

    if not suppress_callbacks:
        getattr(app, "on_deleted_item")(resource, original)
        getattr(app, "on_deleted_item_%s" % resource)(original)

    return all_done()
예제 #30
0
    def upsert_into_legal_archive(self, doc):
        """
        Once publish actions are performed on the article do the below:
            1.  Get legal archive article.
            2.  De-normalize the expired article
            3.  Upserting Legal Archive.
            4.  Get Version History and De-normalize and Inserting Legal Archive Versions
        :param dict doc: doc from 'archive' collection.
        """

        if not doc.get(ITEM_STATE) in {CONTENT_STATE.PUBLISHED, CONTENT_STATE.CORRECTED, CONTENT_STATE.KILLED}:
            logger.exception('Invalid state: {}. Cannot move the item to legal archive. item: {}'.
                             format(doc.get(ITEM_STATE), self.log_msg_format.format(**doc)))
            return

        # required for behave test.
        legal_archive_doc = deepcopy(doc)

        legal_archive_service = get_resource_service(LEGAL_ARCHIVE_NAME)
        legal_archive_versions_service = get_resource_service(LEGAL_ARCHIVE_VERSIONS_NAME)

        log_msg = self.log_msg_format.format(**legal_archive_doc)
        version_id_field = versioned_id_field(app.config['DOMAIN'][ARCHIVE])
        logger.info('Preparing Article to be inserted into Legal Archive ' + log_msg)

        # Removing Irrelevant properties
        legal_archive_doc.pop(config.ETAG, None)
        legal_archive_doc.pop('lock_user', None)
        legal_archive_doc.pop('lock_session', None)
        legal_archive_doc.pop('lock_time', None)

        logger.info('Removed irrelevant properties from the article ' + log_msg)

        # Step 1
        article_in_legal_archive = legal_archive_service.find_one(req=None, _id=legal_archive_doc[config.ID_FIELD])

        # Step 2 - De-normalizing the legal archive doc
        self._denormalize_user_desk(legal_archive_doc, log_msg)
        logger.info('De-normalized article ' + log_msg)

        # Step 3 - Upserting Legal Archive
        logger.info('Upserting Legal Archive Repo with article ' + log_msg)

        if article_in_legal_archive:
            legal_archive_service.put(legal_archive_doc[config.ID_FIELD], legal_archive_doc)
        else:
            legal_archive_service.post([legal_archive_doc])

        # Step 4 - Get Version History and De-normalize and Inserting Legal Archive Versions
        lookup = {version_id_field: legal_archive_doc[config.ID_FIELD]}
        version_history = list(get_resource_service('archive_versions').get(req=None, lookup=lookup))
        legal_version_history = list(legal_archive_versions_service.get(req=None, lookup=lookup))

        logger.info('Fetched version history for article ' + log_msg)
        versions_to_insert = [version for version in version_history
                              if not any(legal_version for legal_version in legal_version_history
                                         if version[config.VERSION] == legal_version[config.VERSION])]

        for version_doc in versions_to_insert:
            self._denormalize_user_desk(version_doc,
                                        self.log_msg_format.format(_id=version_doc[version_id_field],
                                                                   unique_name=version_doc['unique_name'],
                                                                   _current_version=version_doc[config.VERSION],
                                                                   expiry=version_doc['expiry']))
            del version_doc[config.ETAG]

        if versions_to_insert:
            legal_archive_versions_service.post(versions_to_insert)
            logger.info('Inserted de-normalized version history for article ' + log_msg)

        logger.info('Upsert completed for article ' + log_msg)
예제 #31
0
 def __init_article_versions(self):
     return [{'guid': 'tag:localhost:2015:69b961ab-2816-4b8a-a584-a7b402fed4f9',
              versioned_id_field(): '8',
              'type': 'text',
              config.VERSION: 1,
              'urgency': 4,
              'pubstatus': 'usable',
              'firstcreated': utcnow(),
              'byline': 'By Alan Karben',
              'dateline': 'Sydney',
              'keywords': ['Student', 'Crime', 'Police', 'Missing'],
              'subject': [{'qcode': '17004000', 'name': 'Statistics'},
                          {'qcode': '04001002', 'name': 'Weather'}],
              'state': 'draft',
              'expiry': utcnow() + timedelta(minutes=20),
              'unique_name': '#8'},
             {'guid': 'tag:localhost:2015:69b961ab-2816-4b8a-a584-a7b402fed4f9',
              versioned_id_field(): '8',
              'type': 'text',
              config.VERSION: 2,
              'urgency': 4,
              'headline': 'Two students missing',
              'pubstatus': 'usable',
              'firstcreated': utcnow(),
              'byline': 'By Alan Karben',
              'dateline': 'Sydney',
              'keywords': ['Student', 'Crime', 'Police', 'Missing'],
              'subject': [{'qcode': '17004000', 'name': 'Statistics'},
                          {'qcode': '04001002', 'name': 'Weather'}],
              'state': 'submitted',
              'expiry': utcnow() + timedelta(minutes=20),
              'unique_name': '#8'},
             {'guid': 'tag:localhost:2015:69b961ab-2816-4b8a-a584-a7b402fed4f9',
              versioned_id_field(): '8',
              'type': 'text',
              config.VERSION: 3,
              'urgency': 4,
              'headline': 'Two students missing',
              'pubstatus': 'usable',
              'firstcreated': utcnow(),
              'byline': 'By Alan Karben',
              'ednote': 'Andrew Marwood contributed to this article',
              'dateline': 'Sydney',
              'keywords': ['Student', 'Crime', 'Police', 'Missing'],
              'subject': [{'qcode': '17004000', 'name': 'Statistics'},
                          {'qcode': '04001002', 'name': 'Weather'}],
              'state': 'in_progress',
              'expiry': utcnow() + timedelta(minutes=20),
              'unique_name': '#8'},
             {'guid': 'tag:localhost:2015:69b961ab-2816-4b8a-a584-a7b402fed4f9',
              versioned_id_field(): '8',
              'type': 'text',
              config.VERSION: 4,
              'body_html': 'Test body',
              'urgency': 4,
              'headline': 'Two students missing',
              'pubstatus': 'usable',
              'firstcreated': utcnow(),
              'byline': 'By Alan Karben',
              'ednote': 'Andrew Marwood contributed to this article',
              'dateline': 'Sydney',
              'keywords': ['Student', 'Crime', 'Police', 'Missing'],
              'subject': [{'qcode': '17004000', 'name': 'Statistics'},
                          {'qcode': '04001002', 'name': 'Weather'}],
              'state': 'in_progress',
              'expiry': utcnow() + timedelta(minutes=20),
              'unique_name': '#8'}]
예제 #32
0
    def upsert_into_legal_archive(self, item_id):
        """Once publish actions are performed on the article do the below:

            1.  Get legal archive article.
            2.  De-normalize the expired article
            3.  Upserting Legal Archive.
            4.  Get Version History and De-normalize and Inserting Legal Archive Versions
            5.  Get History and de-normalize and insert into Legal Archive History

        :param dict item_id: id of the document from 'archive' collection.
        """
        try:

            logger.info("Import item into legal {}.".format(item_id))

            doc = get_resource_service(ARCHIVE).find_one(req=None, _id=item_id)

            if not doc:
                logger.error(
                    "Could not find the document {} to import to legal archive."
                    .format(item_id))
                return

            # setting default values in case they are missing other log message will fail.
            doc.setdefault("unique_name", "NO UNIQUE NAME")
            doc.setdefault(config.VERSION, 1)
            doc.setdefault("expiry", utcnow())

            if not doc.get(ITEM_STATE) in PUBLISH_STATES:
                # at times we have seen that item is published but the item is different in the archive collection
                # this will notify admins about the issue but proceed to move the item into legal archive.
                msg = "Invalid state: {}. Moving the item to legal archive. item: {}".format(
                    doc.get(ITEM_STATE), self.log_msg_format.format(**doc))
                logger.error(msg)
                update_notifiers(ACTIVITY_ERROR, msg=msg, resource=ARCHIVE)

            # required for behave test.
            legal_archive_doc = deepcopy(doc)
            legal_archive_service = get_resource_service(LEGAL_ARCHIVE_NAME)
            legal_archive_versions_service = get_resource_service(
                LEGAL_ARCHIVE_VERSIONS_NAME)
            legal_archive_history_service = get_resource_service(
                LEGAL_ARCHIVE_HISTORY_NAME)

            log_msg = self.log_msg_format.format(**legal_archive_doc)
            version_id_field = versioned_id_field(
                app.config["DOMAIN"][ARCHIVE])
            logger.info(
                "Preparing Article to be inserted into Legal Archive " +
                log_msg)

            # Removing irrelevant properties
            legal_archive_doc.pop(config.ETAG, None)
            legal_archive_doc.pop("lock_user", None)
            legal_archive_doc.pop("lock_session", None)
            legal_archive_doc.pop("lock_time", None)
            legal_archive_doc.pop("lock_action", None)

            logger.info(
                "Removed irrelevant properties from the article {}".format(
                    log_msg))

            # Step 1
            article_in_legal_archive = legal_archive_service.find_one(
                req=None, _id=legal_archive_doc[config.ID_FIELD])

            if article_in_legal_archive and article_in_legal_archive.get(
                    config.VERSION, 0) > legal_archive_doc.get(config.VERSION):
                logger.info(
                    "Item {} version: {} already in legal archive. Legal Archive document version {}"
                    .format(
                        legal_archive_doc.get(config.ID_FIELD),
                        legal_archive_doc.get(config.VERSION),
                        article_in_legal_archive.get(config.VERSION),
                    ))
                self._set_moved_to_legal(doc)
                return

            # Step 2 - De-normalizing the legal archive doc
            self._denormalize_user_desk(legal_archive_doc, log_msg)
            logger.info("De-normalized article {}".format(log_msg))

            # Step 3 - Upserting Legal Archive
            logger.info(
                "Upserting Legal Archive Repo with article {}".format(log_msg))

            if article_in_legal_archive:
                legal_archive_service.put(legal_archive_doc[config.ID_FIELD],
                                          legal_archive_doc)
            else:
                legal_archive_service.post([legal_archive_doc])

            # Step 4 - Get Versions and De-normalize and Inserting Legal Archive Versions
            lookup = {version_id_field: legal_archive_doc[config.ID_FIELD]}
            versions = list(
                get_resource_service("archive_versions").get(req=None,
                                                             lookup=lookup))
            legal_versions = list(
                legal_archive_versions_service.get(req=None, lookup=lookup))

            logger.info(
                "Fetched version history for article {}".format(log_msg))
            versions_to_insert = [
                version for version in versions
                if not any(legal_version for legal_version in legal_versions
                           if version[config.VERSION] == legal_version[
                               config.VERSION])
            ]

            # Step 5 - Get History and de-normalize and insert into Legal Archive History
            lookup = {"item_id": legal_archive_doc[config.ID_FIELD]}
            history_items = list(
                get_resource_service("archive_history").get(req=None,
                                                            lookup=lookup))
            legal_history_items = list(
                legal_archive_history_service.get(req=None, lookup=lookup))

            logger.info("Fetched history for article {}".format(log_msg))
            history_to_insert = [
                history for history in history_items
                if not any(legal_version
                           for legal_version in legal_history_items
                           if history[config.ID_FIELD] == legal_version[
                               config.ID_FIELD])
            ]

            # This happens when user kills an article from Dusty Archive
            if (article_in_legal_archive
                    and article_in_legal_archive[config.VERSION] <
                    legal_archive_doc[config.VERSION]
                    and len(versions_to_insert) == 0):

                resource_def = app.config["DOMAIN"][ARCHIVE]
                versioned_doc = deepcopy(legal_archive_doc)
                versioned_doc[versioned_id_field(
                    resource_def)] = legal_archive_doc[config.ID_FIELD]
                versioned_doc[config.ID_FIELD] = ObjectId()
                versions_to_insert.append(versioned_doc)

            for version_doc in versions_to_insert:
                self._denormalize_user_desk(
                    version_doc,
                    self.log_msg_format.format(
                        _id=version_doc[version_id_field],
                        unique_name=version_doc.get("unique_name"),
                        _current_version=version_doc[config.VERSION],
                        expiry=version_doc.get("expiry"),
                    ),
                )
                version_doc.pop(config.ETAG, None)

            if versions_to_insert:
                legal_archive_versions_service.post(versions_to_insert)
                logger.info(
                    "Inserted de-normalized versions for article {}".format(
                        log_msg))

            for history_doc in history_to_insert:
                self._denormalize_history(history_doc)
                history_doc.pop(config.ETAG, None)

            if history_to_insert:
                legal_archive_history_service.post(history_to_insert)
                logger.info(
                    "Inserted de-normalized history for article {}".format(
                        log_msg))

            # Set the flag that item is moved to legal.
            self._set_moved_to_legal(doc)

            logger.info("Upsert completed for article " + log_msg)
        except Exception:
            logger.exception(
                "Failed to import into legal archive {}.".format(item_id))
            raise
예제 #33
0
    def upsert_into_legal_archive(self, doc):
        """
        Once publish actions are performed on the article do the below:
            1.  Get legal archive article.
            2.  De-normalize the expired article
            3.  Upserting Legal Archive.
            4.  Get Version History and De-normalize and Inserting Legal Archive Versions
        :param dict doc: doc from 'archive' collection.
        """

        if not doc.get(ITEM_STATE) in {
                CONTENT_STATE.PUBLISHED, CONTENT_STATE.CORRECTED,
                CONTENT_STATE.KILLED
        }:
            logger.error(
                'Invalid state: {}. Cannot move the item to legal archive. item: {}'
                .format(doc.get(ITEM_STATE),
                        self.log_msg_format.format(**doc)))
            return

        # required for behave test.
        legal_archive_doc = deepcopy(doc)

        legal_archive_service = get_resource_service(LEGAL_ARCHIVE_NAME)
        legal_archive_versions_service = get_resource_service(
            LEGAL_ARCHIVE_VERSIONS_NAME)

        log_msg = self.log_msg_format.format(**legal_archive_doc)
        version_id_field = versioned_id_field(app.config['DOMAIN'][ARCHIVE])
        logger.info('Preparing Article to be inserted into Legal Archive ' +
                    log_msg)

        # Removing Irrelevant properties
        legal_archive_doc.pop(config.ETAG, None)
        legal_archive_doc.pop('lock_user', None)
        legal_archive_doc.pop('lock_session', None)
        legal_archive_doc.pop('lock_time', None)

        logger.info('Removed irrelevant properties from the article {}'.format(
            log_msg))

        # Step 1
        article_in_legal_archive = legal_archive_service.find_one(
            req=None, _id=legal_archive_doc[config.ID_FIELD])

        # Step 2 - De-normalizing the legal archive doc
        self._denormalize_user_desk(legal_archive_doc, log_msg)
        logger.info('De-normalized article {}'.format(log_msg))

        # Step 3 - Upserting Legal Archive
        logger.info(
            'Upserting Legal Archive Repo with article {}'.format(log_msg))

        if article_in_legal_archive:
            legal_archive_service.put(legal_archive_doc[config.ID_FIELD],
                                      legal_archive_doc)
        else:
            legal_archive_service.post([legal_archive_doc])

        # Step 4 - Get Version History and De-normalize and Inserting Legal Archive Versions
        lookup = {version_id_field: legal_archive_doc[config.ID_FIELD]}
        version_history = list(
            get_resource_service('archive_versions').get(req=None,
                                                         lookup=lookup))
        legal_version_history = list(
            legal_archive_versions_service.get(req=None, lookup=lookup))

        logger.info('Fetched version history for article {}'.format(log_msg))
        versions_to_insert = [
            version for version in version_history if not any(
                legal_version for legal_version in legal_version_history
                if version[config.VERSION] == legal_version[config.VERSION])
        ]

        # This happens when user kills an article from Dusty Archive
        if article_in_legal_archive and article_in_legal_archive[config.VERSION] < legal_archive_doc[config.VERSION] \
                and len(versions_to_insert) == 0:
            resource_def = app.config['DOMAIN'][ARCHIVE]
            versioned_doc = deepcopy(legal_archive_doc)
            versioned_doc[versioned_id_field(
                resource_def)] = legal_archive_doc[config.ID_FIELD]
            versioned_doc[config.ID_FIELD] = ObjectId()
            versions_to_insert.append(versioned_doc)

        for version_doc in versions_to_insert:
            self._denormalize_user_desk(
                version_doc,
                self.log_msg_format.format(
                    _id=version_doc[version_id_field],
                    unique_name=version_doc.get('unique_name'),
                    _current_version=version_doc[config.VERSION],
                    expiry=version_doc.get('expiry')))
            version_doc.pop(config.ETAG, None)

        if versions_to_insert:
            legal_archive_versions_service.post(versions_to_insert)
            logger.info(
                'Inserted de-normalized version history for article {}'.format(
                    log_msg))

        logger.info('Upsert completed for article ' + log_msg)
예제 #34
0
    def test_remove_expired_published_and_killed_content(self):
        cmd = ValidatorsPopulateCommand()

        with self.app.app_context():
            cmd.run(self.filename)
            self.app.data.insert('archive_versions', self.article_versions)

            published_service = get_resource_service('published')
            text_archive = get_resource_service('text_archive')

            # Publishing an Article
            doc = self.articles[0]
            original = doc.copy()
            get_resource_service('archive_publish').queue_transmission(original)
            published_service.post([original])

            published_items = published_service.get_other_published_items(original['item_id'])
            self.assertEquals(1, published_items.count())

            # Setting the expiry date of the published article to 1 hr back from now
            published_service.update_published_items(original['item_id'], 'expiry', utcnow() + timedelta(minutes=-60))

            # Killing the published article and manually inserting the version of the article as unittests use
            # service directly
            _current_version = doc[config.VERSION] + 1
            get_resource_service('archive_kill').patch(id=doc['_id'],
                                                       updates={config.VERSION: _current_version})
            killed_version = {
                'guid': 'tag:localhost:2015:69b961ab-2816-4b8a-a584-a7b402fed4f9',
                versioned_id_field(): '1',
                'type': 'text',
                config.VERSION: _current_version,
                'body_html': 'Test body',
                'destination_groups': ['4'],
                'urgency': 4,
                'headline': 'Two students missing',
                'pubstatus': 'usable',
                'firstcreated': utcnow(),
                'byline': 'By Alan Karben',
                'ednote': 'Andrew Marwood contributed to this article',
                'dateline': 'Sydney',
                'keywords': ['Student', 'Crime', 'Police', 'Missing'],
                'subject': [{'qcode': '17004000', 'name': 'Statistics'}, {'qcode': '04001002', 'name': 'Weather'}],
                'state': 'published',
                'expiry': utcnow() + timedelta(minutes=20),
                'unique_name': '#2'
            }
            self.app.data.insert('archive_versions', [killed_version])

            # Executing the Expiry Job for the Published Article and asserting the collections
            RemoveExpiredPublishContent().run()

            articles_in_text_archive = text_archive.get(req=None, lookup={'item_id': original['item_id']})
            self.assertEquals(articles_in_text_archive.count(), 0)

            published_items = published_service.get_other_published_items(str(original['item_id']))
            self.assertEquals(1, published_items.count())

            article_in_production = get_resource_service(ARCHIVE).find_one(req=None, _id=original['item_id'])
            self.assertIsNotNone(article_in_production)
            self.assertEquals(article_in_production['state'], 'killed')
            self.assertEquals(article_in_production[config.VERSION], _current_version)

            # Validate the collections in Legal Archive
            article_in_legal_archive, article_versions_in_legal_archive, formatted_items, queue_items = \
                self.__get_legal_archive_details(original['item_id'])

            self.assertIsNotNone(article_in_legal_archive, 'Article cannot be none in Legal Archive')
            self.assertEquals(article_in_legal_archive['state'], 'published')

            self.assertIsNotNone(article_versions_in_legal_archive, 'Article Versions cannot be none in Legal Archive')
            self.assertEquals(article_versions_in_legal_archive.count(), 4)

            self.assertGreaterEqual(formatted_items.count(), 1, 'Formatted Items must be greater than or equal to 1')
            for formatted_item in formatted_items:
                self.assertEquals(formatted_item['item_id'], original['item_id'])
                self.assertEquals(formatted_item['item_version'], self.articles[0][config.VERSION])

            self.assertGreaterEqual(queue_items.count(), 1, 'Publish Queue Items must be greater than or equal to 1')

            # Setting the expiry date of the killed article to 1 hr back from now and running the job again
            published_service.update_published_items(original['item_id'], 'expiry', utcnow() + timedelta(minutes=-60))
            RemoveExpiredPublishContent().run()

            articles_in_text_archive = text_archive.get(req=None, lookup={'item_id': original['item_id']})
            self.assertEquals(articles_in_text_archive.count(), 0)

            published_items = published_service.get_other_published_items(str(original['item_id']))
            self.assertEquals(0, published_items.count())

            article_in_production = get_resource_service(ARCHIVE).find_one(req=None, _id=original['item_id'])
            self.assertIsNone(article_in_production)

            # Validate the collections in Legal Archive
            article_in_legal_archive, article_versions_in_legal_archive, formatted_items, queue_items = \
                self.__get_legal_archive_details(original['item_id'], article_version=_current_version,
                                                 publishing_action='killed')

            self.assertIsNotNone(article_in_legal_archive, 'Article cannot be none in Legal Archive')
            self.assertEquals(article_in_legal_archive['state'], 'killed')

            self.assertIsNotNone(article_versions_in_legal_archive, 'Article Versions cannot be none in Legal Archive')
            self.assertEquals(article_versions_in_legal_archive.count(), 5)

            self.assertGreaterEqual(formatted_items.count(), 1, 'Formatted Items must be greater than or equal to 1')
            for formatted_item in formatted_items:
                self.assertEquals(formatted_item['item_id'], original['item_id'])
                self.assertEquals(formatted_item['item_version'], _current_version)

            self.assertGreaterEqual(queue_items.count(), 1, 'Publish Queue Items must be greater than or equal to 1')
예제 #35
0
def getitem_internal(resource, **lookup):
    """
    :param resource: the name of the resource to which the document belongs.
    :param **lookup: the lookup query.

    .. versionchanged:: 0.6
       Handle soft deleted documents

    .. versionchanged:: 0.5
       Allow ``?version=all`` requests to fire ``on_fetched_*`` events.
       Create pagination links for document versions. (#475)
       Pagination links reflect current query. (#464)

    .. versionchanged:: 0.4
       HATOEAS link for contains the business unit value even when
       regexes have been configured for the resource endpoint.
       'on_fetched' now returns the whole response (HATEOAS metafields
       included.)
       Support for document versioning.
       Changed ``on_fetch_*`` changed to ``on_fetched_*``.

    .. versionchanged:: 0.3
       Support for media fields.
       When IF_MATCH is disabled, no etag is included in the payload.

    .. versionchanged:: 0.1.1
       Support for Embeded Resource Serialization.

    .. versionchanged:: 0.1.0
       Support for optional HATEOAS.

    .. versionchanged: 0.0.8
       'on_getting_item' event is raised when a document has been read from the
       database and is about to be sent to the client.

    .. versionchanged:: 0.0.7
       Support for Rate-Limiting.

    .. versionchanged:: 0.0.6
       Support for HEAD requests.

    .. versionchanged:: 0.0.6
        ETag added to payload.

    .. versionchanged:: 0.0.5
       Support for user-restricted access to resources.
       Support for LAST_UPDATED field missing from documents, because they were
       created outside the API context.

    .. versionchanged:: 0.0.4
       Added the ``requires_auth`` decorator.

    .. versionchanged:: 0.0.3
       Superflous ``response`` container removed. Links wrapped with
       ``_links``. Links are now properly JSON formatted.
    """
    req = parse_request(resource)
    resource_def = config.DOMAIN[resource]
    embedded_fields = resolve_embedded_fields(resource, req)

    soft_delete_enabled = config.DOMAIN[resource]['soft_delete']
    if soft_delete_enabled:
        # GET requests should always fetch soft deleted documents from the db
        # They are handled and included in 404 responses below.
        req.show_deleted = True

    document = app.data.find_one(resource, req, **lookup)
    if not document:
        abort(404)

    response = {}
    etag = None
    version = request.args.get(config.VERSION_PARAM)
    latest_doc = None
    cursor = None

    # calculate last_modified before get_old_document rolls back the document,
    # allowing us to invalidate the cache when _latest_version changes
    last_modified = last_updated(document)

    # synthesize old document version(s)
    if resource_def['versioning'] is True:
        latest_doc = document
        document = get_old_document(
            resource, req, lookup, document, version)

    # meld into response document
    build_response_document(document, resource, embedded_fields, latest_doc)
    if config.IF_MATCH:
        etag = document[config.ETAG]
        if resource_def['versioning'] is True:
            # In order to keep the LATEST_VERSION field up to date in client
            # caches, changes to the latest version should invalidate cached
            # copies of previous verisons. Incorporate the latest version into
            # versioned document ETags on the fly to ensure 'If-None-Match'
            # comparisons support this caching behavior.
            etag += str(document[config.LATEST_VERSION])

    # check embedded fields resolved in build_response_document() for more
    # recent last updated timestamps. We don't want to respond 304 if embedded
    # fields have changed
    for field in embedded_fields:
        embedded_document = document.get(field)
        if isinstance(embedded_document, dict):
            embedded_last_updated = last_updated(embedded_document)
            if embedded_last_updated > last_modified:
                last_modified = embedded_last_updated

    # facilitate client caching by returning a 304 when appropriate
    cache_validators = {True: 0, False: 0}
    if req.if_modified_since:
        cache_valid = (last_modified <= req.if_modified_since)
        cache_validators[cache_valid] += 1
    if req.if_none_match:
        cache_valid = (etag == req.if_none_match)
        cache_validators[cache_valid] += 1
    # If all cache validators are true, return 304
    if (cache_validators[True] > 0) and (cache_validators[False] == 0):
        return {}, last_modified, etag, 304

    if version == 'all' or version == 'diffs':
        # find all versions
        lookup[versioned_id_field(resource_def)] \
            = lookup[resource_def['id_field']]
        del lookup[resource_def['id_field']]
        if version == 'diffs' or req.sort is None:
            # default sort for 'all', required sort for 'diffs'
            req.sort = '[("%s", 1)]' % config.VERSION
        req.if_modified_since = None  # we always want the full history here
        cursor = app.data.find(resource + config.VERSIONS, req, lookup)

        # build all versions
        documents = []
        if cursor.count() == 0:
            # this is the scenario when the document existed before
            # document versioning got turned on
            documents.append(latest_doc)
        else:
            last_document = {}

            # if we aren't starting on page 1, then we need to init last_doc
            if version == 'diffs' and req.page > 1:
                # grab the last document on the previous page to diff from
                last_version = cursor[0][app.config['VERSION']] - 1
                last_document = get_old_document(
                    resource, req, lookup, latest_doc, last_version)

            for i, document in enumerate(cursor):
                document = synthesize_versioned_document(
                    latest_doc, document, resource_def)
                build_response_document(
                    document, resource, embedded_fields, latest_doc)
                if version == 'diffs':
                    if i == 0:
                        documents.append(document)
                    else:
                        documents.append(diff_document(
                            resource_def, last_document, document))
                    last_document = document
                else:
                    documents.append(document)

        # add documents to response
        if config.DOMAIN[resource]['hateoas']:
            response[config.ITEMS] = documents
        else:
            response = documents
    elif soft_delete_enabled and document.get(config.DELETED) is True:
        # This document was soft deleted. Respond with 404 and the deleted
        # version of the document.
        document[config.STATUS] = config.STATUS_ERR,
        document[config.ERROR] = {
            'code': 404,
            'message': 'The requested URL was not found on this server.'
        }
        return document, last_modified, etag, 404
    else:
        response = document

    # extra hateoas links
    if config.DOMAIN[resource]['hateoas']:
        # use the id of the latest document for multi-document requests
        if cursor:
            count = cursor.count(with_limit_and_skip=False)
            response[config.LINKS] = \
                _pagination_links(resource, req, count,
                                  latest_doc[resource_def['id_field']])
            if config.DOMAIN[resource]['pagination']:
                response[config.META] = _meta_links(req, count)
        else:
            response[config.LINKS] = \
                _pagination_links(resource, req, None,
                                  response[resource_def['id_field']])

    # callbacks not supported on version diffs because of partial documents
    if version != 'diffs':
        # TODO: callbacks not currently supported with ?version=all

        # notify registered callback functions. Please note that, should
        # the functions modify the document, last_modified and etag
        # won't be updated to reflect the changes (they always reflect the
        # documents state on the database).
        if resource_def['versioning'] is True and version == 'all':
            versions = response
            if config.DOMAIN[resource]['hateoas']:
                versions = response[config.ITEMS]
            for version_item in versions:
                getattr(app, "on_fetched_item")(resource, version_item)
                getattr(app, "on_fetched_item_%s" % resource)(version_item)
        else:
            getattr(app, "on_fetched_item")(resource, response)
            getattr(app, "on_fetched_item_%s" % resource)(response)

    return response, last_modified, etag, 200
예제 #36
0
    def _upsert_into_legal_archive(self, doc):
        """
        For the expired published article represented by doc, do the below:
            1.  Fetch version history of article so that version_history_doc[config.VERSION] <= doc[config.VERSION].
            2.  De-normalize the expired article and each version of the article
            3.  Fetch Transmission Details so that queued_item['item_version'] == doc[config.VERSION]
            4.  De-normalize the Transmission Details
            5.  An article can be published more than time before it's removed from production database, it's important
                to check if the article already exists in Legal Archive DB. If exists then replace the article in
                Legal Archive DB, otherwise create.
            6.  Create the Version History of the article in Legal Archive DB.
            7.  Create the Transmission Details in Legal Archive DB.

        :param: doc - expired doc from 'published' collection.
        :return: transmission details
        """

        legal_archive_doc = doc.copy()
        logging.info('Preparing Article to be inserted into Legal Archive %s' % legal_archive_doc.get('unique_name'))

        # Removing Irrelevant properties
        legal_archive_doc[config.ID_FIELD] = legal_archive_doc['item_id']
        del legal_archive_doc[config.ETAG]
        del legal_archive_doc['item_id']

        logging.info('Removed Irrelevant properties from the article %s' % legal_archive_doc.get('unique_name'))

        # Step 3 - Fetch Publish Queue Items
        lookup = {'item_id': legal_archive_doc[config.ID_FIELD], 'item_version': legal_archive_doc[config.VERSION]}
        queue_items = list(get_resource_service('publish_queue').get(req=None, lookup=lookup))
        assert len(queue_items) > 0, \
            "Transmission Details are empty for published item %s" % legal_archive_doc[config.ID_FIELD]
        logging.info('Fetched transmission details for article %s' % legal_archive_doc.get('unique_name'))

        # Step 4
        subscriber_ids = list({str(queue_item['subscriber_id']) for queue_item in queue_items})
        query = {'$and': [{config.ID_FIELD: {'$in': subscriber_ids}}]}
        subscribers = list(get_resource_service('subscribers').get(req=None, lookup=query))
        subscribers = {str(subscriber[config.ID_FIELD]): subscriber for subscriber in subscribers}

        for queue_item in queue_items:
            del queue_item[config.ETAG]
            queue_item['subscriber_id'] = subscribers[str(queue_item['subscriber_id'])]['name']
        logging.info(
            'De-normalized the Transmission Detail records of article %s' % legal_archive_doc.get('unique_name'))

        # Step 2 - De-normalizing the legal archive doc
        self._denormalize_user_desk(legal_archive_doc)

        # Step 1 - Get Version History
        req = ParsedRequest()
        req.sort = '[("%s", 1)]' % config.VERSION
        resource_def = app.config['DOMAIN']['archive']
        version_id = versioned_id_field(resource_def)
        lookup = {'$and': [{version_id: legal_archive_doc[config.ID_FIELD]},
                           {config.VERSION: {'$lte': legal_archive_doc[config.VERSION]}}]}

        version_history = list(get_resource_service('archive_versions').get(req=req, lookup=lookup))
        legal_archive_doc_versions = []
        for versioned_doc in version_history:
            self._denormalize_user_desk(versioned_doc)
            del versioned_doc[config.ETAG]
            legal_archive_doc_versions.append(versioned_doc)
        logging.info('Fetched version history for article %s' % legal_archive_doc.get('unique_name'))

        legal_archive_service = get_resource_service(LEGAL_ARCHIVE_NAME)
        legal_archive_versions_service = get_resource_service(LEGAL_ARCHIVE_VERSIONS_NAME)
        legal_publish_queue_service = get_resource_service(LEGAL_PUBLISH_QUEUE_NAME)

        # Step 5 - Upserting Legal Archive
        logging.info('Upserting Legal Archive Repo with article %s' % legal_archive_doc.get('unique_name'))

        article_in_legal_archive = legal_archive_service.find_one(_id=legal_archive_doc[config.ID_FIELD],
                                                                  req=ParsedRequest())
        if article_in_legal_archive:
            legal_archive_service.put(legal_archive_doc[config.ID_FIELD], legal_archive_doc)
        else:
            legal_archive_service.post([legal_archive_doc])

        # Step 6
        if legal_archive_doc_versions:
            legal_archive_versions_service.post(legal_archive_doc_versions)
        # Step 7
        legal_publish_queue_service.post(queue_items)

        logging.info('Upsert completed for article %s' % legal_archive_doc.get('unique_name'))

        return queue_items