Esempio n. 1
0
    def filter_document(doc):
        """
        Filter document:
        1. Remove fields that should not be there given it's profile.
        2. Remove `None` valued renditions.

        :param dict doc: document to filter
        :return: dict filtered document
        """

        # remove fields that should not be there given it's profile.
        doc = apply_schema(doc)

        # remove `None` valued renditions.
        for association_key in doc.get(ASSOCIATIONS, {}):
            association = doc[ASSOCIATIONS][association_key]
            if not association:
                continue

            renditions = association.get("renditions", {})
            for null_rendition_key in [
                    k for k in renditions if not renditions[k]
            ]:
                del doc[ASSOCIATIONS][association_key]["renditions"][
                    null_rendition_key]

        return doc
Esempio n. 2
0
    def create(self, docs, **kwargs):
        service = get_resource_service('archive')
        doc = docs[0]
        formatter_name = doc.get('formatter_name')

        if not formatter_name:
            raise SuperdeskApiError.badRequestError('Formatter name not found')

        formatter = self._get_formatter(formatter_name)

        if not formatter:
                raise SuperdeskApiError.badRequestError('Formatter not found')

        if 'article_id' in doc:
            article_id = doc.get('article_id')
            article = service.find_one(req=None, _id=article_id)

            if not article:
                raise SuperdeskApiError.badRequestError('Article not found!')

            try:
                self._validate(article)
                sequence, formatted_doc = formatter.format(apply_schema(article), {'_id': '0'}, None)[0]
                formatted_doc = formatted_doc.replace('\'\'', '\'')

                # respond only with the formatted output if output_field is configured
                if hasattr(formatter, 'output_field'):
                    formatted_doc = json.loads(formatted_doc)
                    formatted_doc = formatted_doc.get(formatter.output_field, '').replace('\'\'', '\'')
            except Exception as ex:
                logger.exception(ex)
                raise SuperdeskApiError.\
                    badRequestError('Error in formatting article: {}'.format(str(ex)))

            return [{'formatted_doc': formatted_doc}]
Esempio n. 3
0
    def create(self, docs, **kwargs):
        service = get_resource_service('archive')
        doc = docs[0]
        formatter_name = doc.get('formatter_name')

        if not formatter_name:
            raise SuperdeskApiError.badRequestError('Formatter name not found')

        formatter = self._get_formatter(formatter_name)

        if not formatter:
            raise SuperdeskApiError.badRequestError('Formatter not found')

        if 'article_id' in doc:
            article_id = doc.get('article_id')
            article = service.find_one(req=None, _id=article_id)

            if not article:
                raise SuperdeskApiError.badRequestError('Article not found!')

            try:
                self._validate(article)
                sequence, formatted_doc = formatter.format(apply_schema(article), {'_id': '0'}, None)[0]
                formatted_doc = formatted_doc.replace('\'\'', '\'')

                # respond only with the formatted output if output_field is configured
                if hasattr(formatter, 'output_field'):
                    formatted_doc = json.loads(formatted_doc)
                    formatted_doc = formatted_doc.get(formatter.output_field, '').replace('\'\'', '\'')
            except Exception as ex:
                logger.exception(ex)
                raise SuperdeskApiError.\
                    badRequestError('Error in formatting article: {}'.format(str(ex)))

            return [{'formatted_doc': formatted_doc}]
Esempio n. 4
0
def internal_destination_auto_publish(item, **kwargs):
    """Auto publish the item using internal destination

    :param dict item: item to be published
    :param kwargs:
    :raises StopDuplication: to indicate the superdesk.internal_destination.handle_item_published
    to stop duplication as duplication is handle by this method.
    """
    if item.get(ITEM_STATE) not in PUBLISH_STATES:
        raise InvalidStateTransitionError(message='Internal Destination auto publish macro can '
                                                  'only be called after publishing the item.')
    operation = item.get(ITEM_OPERATION)
    archive_action_service = get_resource_service(publish_services.get(operation))
    archive_service = get_resource_service('archive')
    extra_fields = [PUBLISH_SCHEDULE, SCHEDULE_SETTINGS]
    # if any macro is doing publishing then we need the duplicate item that was published earlier
    req = ParsedRequest()
    req.where = json.dumps({
        '$and': [
            {PROCESSED_FROM: item.get(config.ID_FIELD)},
            {'task.desk': str(item.get('task').get('desk'))}
        ]
    })
    req.max_results = 1
    overwrite_item = next((archive_service.get_from_mongo(req=req, lookup=None)), None)

    # keep pubslish_schedule and schedule_settings in updates so that state can be set to scheduled
    updates = {
        PUBLISH_SCHEDULE: item[PUBLISH_SCHEDULE],
        SCHEDULE_SETTINGS: item[SCHEDULE_SETTINGS]
    }
    if item.get(ITEM_STATE) == CONTENT_STATE.PUBLISHED or not overwrite_item:
        new_id = archive_service.duplicate_content(item, state='routed', extra_fields=extra_fields)
        updates[ITEM_STATE] = item.get(ITEM_STATE)
        updates[PROCESSED_FROM] = item[config.ID_FIELD]

        get_resource_service('archive_publish').patch(id=new_id, updates=updates)
    else:
        if overwrite_item:
            # get the schema fields
            schema_item = apply_schema(deepcopy(item))
            keys_to_delete = ['source', 'unique_id', 'unique_name', 'original_id',
                              'expiry', 'correction_sequence']
            # remove the keys
            archive_service.remove_after_copy(schema_item, delete_keys=keys_to_delete)
            # get the diff
            updates.update({key: val for key, val in schema_item.items()
                            if overwrite_item.get(key) != val and not key.startswith("_")})

            archive_action_service.patch(id=overwrite_item[config.ID_FIELD],
                                         updates=updates)

    # raise stop duplication on successful completion so that
    # internal destination superdesk.internal_destination.handle_item_published
    # will not duplicate the item.
    raise StopDuplication()
Esempio n. 5
0
 def test_apply_schema_profile(self, mock):
     item = {
         'headline': 'foo',
         'slugline': 'bar',
         'guid': '1',
         'profile': 'test'
     }
     self.assertEqual({
         'headline': 'foo',
         'guid': '1',
         'profile': 'test'
     }, apply_schema(item))
Esempio n. 6
0
 def test_apply_schema_profile(self, mock):
     item = {
         "headline": "foo",
         "slugline": "bar",
         "guid": "1",
         "profile": "test"
     }
     self.assertEqual({
         "headline": "foo",
         "guid": "1",
         "profile": "test"
     }, apply_schema(item))
def format_document():

    document_id = request.args.get("document_id")
    subscriber_id = request.args.get("subscriber_id")
    formatter_qcode = request.args.get("formatter")

    subscriber = get_resource_service("subscribers").find_one(req=None, _id=subscriber_id)
    doc = get_resource_service("archive").find_one(req=None, _id=document_id)

    formatter = get_formatter(formatter_qcode, doc)
    formatted_docs = formatter.format(article=apply_schema(doc), subscriber=subscriber, codes=None)

    headers = {
        "Access-Control-Allow-Origin": app.config["CLIENT_URL"],
        "Access-Control-Allow-Methods": "GET",
        "Access-Control-Allow-Headers": ",".join(app.config["X_HEADERS"]),
        "Access-Control-Allow-Credentials": "true",
        "Cache-Control": "no-cache, no-store, must-revalidate",
    }

    return Response(formatted_docs[0][1], headers=headers, mimetype=get_mime_type(formatter_qcode))
def format_document():

    document_id = request.args.get('document_id')
    subscriber_id = request.args.get('subscriber_id')
    formatter_qcode = request.args.get('formatter')

    subscriber = get_resource_service('subscribers').find_one(req=None, _id=subscriber_id)
    doc = get_resource_service('archive').find_one(req=None, _id=document_id)

    formatter = get_formatter(formatter_qcode, doc)
    formatted_docs = formatter.format(article=apply_schema(doc),
                                      subscriber=subscriber,
                                      codes=None)

    headers = {
        'Access-Control-Allow-Origin': '*',
        'Access-Control-Allow-Methods': 'GET',
        'Cache-Control': 'no-cache, no-store, must-revalidate'
    }

    return Response(formatted_docs[0][1], headers=headers, mimetype=get_mime_type(formatter_qcode))
Esempio n. 9
0
    def create(self, docs, **kwargs):
        service = get_resource_service("archive")
        doc = docs[0]
        formatter_name = doc.get("formatter_name")

        if not formatter_name:
            raise SuperdeskApiError.badRequestError(
                _("Formatter name not found"))

        formatter = self._get_formatter(formatter_name)

        if not formatter:
            raise SuperdeskApiError.badRequestError(_("Formatter not found"))

        if "article_id" in doc:
            article_id = doc.get("article_id")
            article = service.find_one(req=None, _id=article_id)

            if not article:
                raise SuperdeskApiError.badRequestError(
                    _("Article not found!"))

            try:
                self._validate(article)
                sequence, formatted_doc = formatter.format(
                    apply_schema(article), {"_id": "0"}, None)[0]
                formatted_doc = formatted_doc.replace("''", "'")

                # respond only with the formatted output if output_field is configured
                if hasattr(formatter, "output_field"):
                    formatted_doc = json.loads(formatted_doc)
                    formatted_doc = formatted_doc.get(formatter.output_field,
                                                      "").replace("''", "'")
            except Exception as ex:
                logger.exception(ex)
                raise SuperdeskApiError.badRequestError(
                    _("Error in formatting article: {exception}").format(
                        exception=str(ex)))

            return [{"formatted_doc": formatted_doc}]
def format_document():

    document_id = request.args.get('document_id')
    subscriber_id = request.args.get('subscriber_id')
    formatter_qcode = request.args.get('formatter')

    subscriber = get_resource_service('subscribers').find_one(
        req=None, _id=subscriber_id)
    doc = get_resource_service('archive').find_one(req=None, _id=document_id)

    formatter = get_formatter(formatter_qcode, doc)
    formatted_docs = formatter.format(article=apply_schema(doc),
                                      subscriber=subscriber,
                                      codes=None)

    headers = {
        'Access-Control-Allow-Origin': '*',
        'Access-Control-Allow-Methods': 'GET',
        'Cache-Control': 'no-cache, no-store, must-revalidate'
    }

    return Response(formatted_docs[0][1],
                    headers=headers,
                    mimetype=get_mime_type(formatter_qcode))
Esempio n. 11
0
 def test_apply_schema_default(self):
     item = {'guid': 'guid', 'headline': 'foo'}
     self.assertEqual(item, apply_schema(item))
Esempio n. 12
0
    def queue_transmission(self, doc, subscribers, subscriber_codes={}, associations={}):
        """Method formats and then queues the article for transmission to the passed subscribers.

        ::Important Note:: Format Type across Subscribers can repeat. But we can't have formatted item generated once
        based on the format_types configured across for all the subscribers as the formatted item must have a published
        sequence number generated by Subscriber.

        :param dict doc: document to queue for transmission
        :param list subscribers: List of subscriber dict.
        :return : (list, bool) tuple of list of missing formatters and boolean flag. True if queued else False
        """
        try:
            queued = False
            no_formatters = []
            for subscriber in subscribers:
                try:
                    if doc[ITEM_TYPE] not in [CONTENT_TYPE.TEXT, CONTENT_TYPE.PREFORMATTED] and \
                            subscriber.get('subscriber_type', '') == SUBSCRIBER_TYPES.WIRE:
                        # wire subscribers can get only text and preformatted stories
                        continue

                    for destination in self.get_destinations(subscriber):
                        embed_package_items = doc[ITEM_TYPE] == CONTENT_TYPE.COMPOSITE and \
                            (destination.get('config') or {}).get('packaged', False)
                        if embed_package_items:
                            doc = self._embed_package_items(doc)

                        if doc.get(PUBLISHED_IN_PACKAGE) and \
                                (destination.get('config') or {}).get('packaged', False):
                            continue

                        # Step 2(a)
                        formatter = get_formatter(destination['format'], doc)

                        if not formatter:  # if formatter not found then record it
                            no_formatters.append(destination['format'])
                            continue

                        formatted_docs = formatter.format(apply_schema(doc),
                                                          subscriber,
                                                          subscriber_codes.get(subscriber[config.ID_FIELD]))

                        for idx, publish_data in enumerate(formatted_docs):
                            if not isinstance(publish_data, dict):
                                pub_seq_num, formatted_doc = publish_data
                                formatted_docs[idx] = {'published_seq_num': pub_seq_num,
                                                       'formatted_item': formatted_doc}
                            else:
                                assert 'published_seq_num' in publish_data and 'formatted_item' in publish_data,\
                                    "missing keys in publish_data"

                        for publish_queue_item in formatted_docs:
                            publish_queue_item['item_id'] = doc['item_id']
                            publish_queue_item['item_version'] = doc[config.VERSION]
                            publish_queue_item['subscriber_id'] = subscriber[config.ID_FIELD]
                            publish_queue_item['codes'] = subscriber_codes.get(subscriber[config.ID_FIELD])
                            publish_queue_item['destination'] = destination
                            # publish_schedule is just to indicate in the queue item is create via scheduled item
                            publish_queue_item[PUBLISH_SCHEDULE] = get_utc_schedule(doc, PUBLISH_SCHEDULE) or None
                            publish_queue_item['unique_name'] = doc.get('unique_name', None)
                            publish_queue_item['content_type'] = doc.get('type', None)
                            publish_queue_item['headline'] = doc.get('headline', None)
                            publish_queue_item['publishing_action'] = self.published_state
                            publish_queue_item['ingest_provider'] = \
                                ObjectId(doc.get('ingest_provider')) if doc.get('ingest_provider') else None
                            publish_queue_item['associated_items'] = associations.get(subscriber[config.ID_FIELD], [])

                            if doc.get(PUBLISHED_IN_PACKAGE):
                                publish_queue_item[PUBLISHED_IN_PACKAGE] = doc[PUBLISHED_IN_PACKAGE]
                            try:
                                encoded_item = publish_queue_item.pop('encoded_item')
                            except KeyError:
                                pass
                            else:
                                binary = io.BytesIO(encoded_item)
                                publish_queue_item['encoded_item_id'] = app.storage.put(binary)
                            publish_queue_item.pop(ITEM_STATE, None)

                            # content api delivery will be marked as SUCCESS in queue
                            get_resource_service('publish_queue').post([publish_queue_item])

                            queued = True
                except Exception:
                    logger.exception("Failed to queue item for id {} with headline {} for subscriber {}."
                                     .format(doc.get(config.ID_FIELD), doc.get('headline'), subscriber.get('name')))

            return no_formatters, queued
        except Exception:
            raise
 def test_apply_schema_profile(self, mock):
     item = {'headline': 'foo', 'slugline': 'bar', 'guid': '1', 'profile': 'test'}
     self.assertEqual({'headline': 'foo', 'guid': '1', 'profile': 'test'}, apply_schema(item))
 def test_apply_schema_default(self):
     item = {'guid': 'guid', 'headline': 'foo'}
     self.assertEqual(item, apply_schema(item))
Esempio n. 15
0
    def queue_transmission(self,
                           doc,
                           subscribers,
                           subscriber_codes={},
                           associations={}):
        """Method formats and then queues the article for transmission to the passed subscribers.

        ::Important Note:: Format Type across Subscribers can repeat. But we can't have formatted item generated once
        based on the format_types configured across for all the subscribers as the formatted item must have a published
        sequence number generated by Subscriber.

        :param dict doc: document to queue for transmission
        :param list subscribers: List of subscriber dict.
        :return : (list, bool) tuple of list of missing formatters and boolean flag. True if queued else False
        """
        try:
            queued = False
            no_formatters = []
            for subscriber in subscribers:
                try:
                    if doc[ITEM_TYPE] not in [CONTENT_TYPE.TEXT, CONTENT_TYPE.PREFORMATTED] and \
                            subscriber.get('subscriber_type', '') == SUBSCRIBER_TYPES.WIRE:
                        # wire subscribers can get only text and preformatted stories
                        continue

                    for destination in self.get_destinations(subscriber):
                        embed_package_items = doc[ITEM_TYPE] == CONTENT_TYPE.COMPOSITE and \
                            PACKAGE_TYPE not in doc and (destination.get('config') or {}).get('packaged', False)
                        if embed_package_items:
                            doc = self._embed_package_items(doc)

                        if doc.get(PUBLISHED_IN_PACKAGE) and \
                                (destination.get('config') or {}).get('packaged', False) and \
                                app.config.get('NO_TAKES'):
                            continue

                        # Step 2(a)
                        formatter = get_formatter(destination['format'], doc)

                        if not formatter:  # if formatter not found then record it
                            no_formatters.append(destination['format'])
                            continue

                        formatted_docs = formatter.format(
                            apply_schema(doc), subscriber,
                            subscriber_codes.get(subscriber[config.ID_FIELD]))

                        for idx, publish_data in enumerate(formatted_docs):
                            if not isinstance(publish_data, dict):
                                pub_seq_num, formatted_doc = publish_data
                                formatted_docs[idx] = {
                                    'published_seq_num': pub_seq_num,
                                    'formatted_item': formatted_doc
                                }
                            else:
                                assert 'published_seq_num' in publish_data and 'formatted_item' in publish_data,\
                                    "missing keys in publish_data"

                        for publish_queue_item in formatted_docs:
                            publish_queue_item['item_id'] = doc['item_id']
                            publish_queue_item['item_version'] = doc[
                                config.VERSION]
                            publish_queue_item['subscriber_id'] = subscriber[
                                config.ID_FIELD]
                            publish_queue_item['codes'] = subscriber_codes.get(
                                subscriber[config.ID_FIELD])
                            publish_queue_item['destination'] = destination
                            # publish_schedule is just to indicate in the queue item is create via scheduled item
                            publish_queue_item[
                                PUBLISH_SCHEDULE] = get_utc_schedule(
                                    doc, PUBLISH_SCHEDULE) or None
                            publish_queue_item['unique_name'] = doc.get(
                                'unique_name', None)
                            publish_queue_item['content_type'] = doc.get(
                                'type', None)
                            publish_queue_item['headline'] = doc.get(
                                'headline', None)
                            publish_queue_item[
                                'publishing_action'] = self.published_state
                            publish_queue_item['ingest_provider'] = \
                                ObjectId(doc.get('ingest_provider')) if doc.get('ingest_provider') else None
                            publish_queue_item[
                                'associated_items'] = associations.get(
                                    subscriber[config.ID_FIELD], [])

                            if doc.get(PUBLISHED_IN_PACKAGE):
                                publish_queue_item[PUBLISHED_IN_PACKAGE] = doc[
                                    PUBLISHED_IN_PACKAGE]
                            try:
                                encoded_item = publish_queue_item.pop(
                                    'encoded_item')
                            except KeyError:
                                pass
                            else:
                                binary = io.BytesIO(encoded_item)
                                publish_queue_item[
                                    'encoded_item_id'] = app.storage.put(
                                        binary)
                            publish_queue_item.pop(ITEM_STATE, None)
                            get_resource_service('publish_queue').post(
                                [publish_queue_item])
                            queued = True
                except:
                    logger.exception(
                        "Failed to queue item for id {} with headline {} for subscriber {}."
                        .format(doc.get(config.ID_FIELD), doc.get('headline'),
                                subscriber.get('name')))

            return no_formatters, queued
        except:
            raise
Esempio n. 16
0
 def test_apply_schema_default(self):
     item = {"guid": "guid", "headline": "foo"}
     self.assertEqual(item, apply_schema(item))