def filter_document(doc): """ Filter document: 1. Remove fields that should not be there given it's profile. 2. Remove `None` valued renditions. :param dict doc: document to filter :return: dict filtered document """ # remove fields that should not be there given it's profile. doc = apply_schema(doc) # remove `None` valued renditions. for association_key in doc.get(ASSOCIATIONS, {}): association = doc[ASSOCIATIONS][association_key] if not association: continue renditions = association.get("renditions", {}) for null_rendition_key in [ k for k in renditions if not renditions[k] ]: del doc[ASSOCIATIONS][association_key]["renditions"][ null_rendition_key] return doc
def create(self, docs, **kwargs): service = get_resource_service('archive') doc = docs[0] formatter_name = doc.get('formatter_name') if not formatter_name: raise SuperdeskApiError.badRequestError('Formatter name not found') formatter = self._get_formatter(formatter_name) if not formatter: raise SuperdeskApiError.badRequestError('Formatter not found') if 'article_id' in doc: article_id = doc.get('article_id') article = service.find_one(req=None, _id=article_id) if not article: raise SuperdeskApiError.badRequestError('Article not found!') try: self._validate(article) sequence, formatted_doc = formatter.format(apply_schema(article), {'_id': '0'}, None)[0] formatted_doc = formatted_doc.replace('\'\'', '\'') # respond only with the formatted output if output_field is configured if hasattr(formatter, 'output_field'): formatted_doc = json.loads(formatted_doc) formatted_doc = formatted_doc.get(formatter.output_field, '').replace('\'\'', '\'') except Exception as ex: logger.exception(ex) raise SuperdeskApiError.\ badRequestError('Error in formatting article: {}'.format(str(ex))) return [{'formatted_doc': formatted_doc}]
def create(self, docs, **kwargs): service = get_resource_service('archive') doc = docs[0] formatter_name = doc.get('formatter_name') if not formatter_name: raise SuperdeskApiError.badRequestError('Formatter name not found') formatter = self._get_formatter(formatter_name) if not formatter: raise SuperdeskApiError.badRequestError('Formatter not found') if 'article_id' in doc: article_id = doc.get('article_id') article = service.find_one(req=None, _id=article_id) if not article: raise SuperdeskApiError.badRequestError('Article not found!') try: self._validate(article) sequence, formatted_doc = formatter.format(apply_schema(article), {'_id': '0'}, None)[0] formatted_doc = formatted_doc.replace('\'\'', '\'') # respond only with the formatted output if output_field is configured if hasattr(formatter, 'output_field'): formatted_doc = json.loads(formatted_doc) formatted_doc = formatted_doc.get(formatter.output_field, '').replace('\'\'', '\'') except Exception as ex: logger.exception(ex) raise SuperdeskApiError.\ badRequestError('Error in formatting article: {}'.format(str(ex))) return [{'formatted_doc': formatted_doc}]
def internal_destination_auto_publish(item, **kwargs): """Auto publish the item using internal destination :param dict item: item to be published :param kwargs: :raises StopDuplication: to indicate the superdesk.internal_destination.handle_item_published to stop duplication as duplication is handle by this method. """ if item.get(ITEM_STATE) not in PUBLISH_STATES: raise InvalidStateTransitionError(message='Internal Destination auto publish macro can ' 'only be called after publishing the item.') operation = item.get(ITEM_OPERATION) archive_action_service = get_resource_service(publish_services.get(operation)) archive_service = get_resource_service('archive') extra_fields = [PUBLISH_SCHEDULE, SCHEDULE_SETTINGS] # if any macro is doing publishing then we need the duplicate item that was published earlier req = ParsedRequest() req.where = json.dumps({ '$and': [ {PROCESSED_FROM: item.get(config.ID_FIELD)}, {'task.desk': str(item.get('task').get('desk'))} ] }) req.max_results = 1 overwrite_item = next((archive_service.get_from_mongo(req=req, lookup=None)), None) # keep pubslish_schedule and schedule_settings in updates so that state can be set to scheduled updates = { PUBLISH_SCHEDULE: item[PUBLISH_SCHEDULE], SCHEDULE_SETTINGS: item[SCHEDULE_SETTINGS] } if item.get(ITEM_STATE) == CONTENT_STATE.PUBLISHED or not overwrite_item: new_id = archive_service.duplicate_content(item, state='routed', extra_fields=extra_fields) updates[ITEM_STATE] = item.get(ITEM_STATE) updates[PROCESSED_FROM] = item[config.ID_FIELD] get_resource_service('archive_publish').patch(id=new_id, updates=updates) else: if overwrite_item: # get the schema fields schema_item = apply_schema(deepcopy(item)) keys_to_delete = ['source', 'unique_id', 'unique_name', 'original_id', 'expiry', 'correction_sequence'] # remove the keys archive_service.remove_after_copy(schema_item, delete_keys=keys_to_delete) # get the diff updates.update({key: val for key, val in schema_item.items() if overwrite_item.get(key) != val and not key.startswith("_")}) archive_action_service.patch(id=overwrite_item[config.ID_FIELD], updates=updates) # raise stop duplication on successful completion so that # internal destination superdesk.internal_destination.handle_item_published # will not duplicate the item. raise StopDuplication()
def test_apply_schema_profile(self, mock): item = { 'headline': 'foo', 'slugline': 'bar', 'guid': '1', 'profile': 'test' } self.assertEqual({ 'headline': 'foo', 'guid': '1', 'profile': 'test' }, apply_schema(item))
def test_apply_schema_profile(self, mock): item = { "headline": "foo", "slugline": "bar", "guid": "1", "profile": "test" } self.assertEqual({ "headline": "foo", "guid": "1", "profile": "test" }, apply_schema(item))
def format_document(): document_id = request.args.get("document_id") subscriber_id = request.args.get("subscriber_id") formatter_qcode = request.args.get("formatter") subscriber = get_resource_service("subscribers").find_one(req=None, _id=subscriber_id) doc = get_resource_service("archive").find_one(req=None, _id=document_id) formatter = get_formatter(formatter_qcode, doc) formatted_docs = formatter.format(article=apply_schema(doc), subscriber=subscriber, codes=None) headers = { "Access-Control-Allow-Origin": app.config["CLIENT_URL"], "Access-Control-Allow-Methods": "GET", "Access-Control-Allow-Headers": ",".join(app.config["X_HEADERS"]), "Access-Control-Allow-Credentials": "true", "Cache-Control": "no-cache, no-store, must-revalidate", } return Response(formatted_docs[0][1], headers=headers, mimetype=get_mime_type(formatter_qcode))
def format_document(): document_id = request.args.get('document_id') subscriber_id = request.args.get('subscriber_id') formatter_qcode = request.args.get('formatter') subscriber = get_resource_service('subscribers').find_one(req=None, _id=subscriber_id) doc = get_resource_service('archive').find_one(req=None, _id=document_id) formatter = get_formatter(formatter_qcode, doc) formatted_docs = formatter.format(article=apply_schema(doc), subscriber=subscriber, codes=None) headers = { 'Access-Control-Allow-Origin': '*', 'Access-Control-Allow-Methods': 'GET', 'Cache-Control': 'no-cache, no-store, must-revalidate' } return Response(formatted_docs[0][1], headers=headers, mimetype=get_mime_type(formatter_qcode))
def create(self, docs, **kwargs): service = get_resource_service("archive") doc = docs[0] formatter_name = doc.get("formatter_name") if not formatter_name: raise SuperdeskApiError.badRequestError( _("Formatter name not found")) formatter = self._get_formatter(formatter_name) if not formatter: raise SuperdeskApiError.badRequestError(_("Formatter not found")) if "article_id" in doc: article_id = doc.get("article_id") article = service.find_one(req=None, _id=article_id) if not article: raise SuperdeskApiError.badRequestError( _("Article not found!")) try: self._validate(article) sequence, formatted_doc = formatter.format( apply_schema(article), {"_id": "0"}, None)[0] formatted_doc = formatted_doc.replace("''", "'") # respond only with the formatted output if output_field is configured if hasattr(formatter, "output_field"): formatted_doc = json.loads(formatted_doc) formatted_doc = formatted_doc.get(formatter.output_field, "").replace("''", "'") except Exception as ex: logger.exception(ex) raise SuperdeskApiError.badRequestError( _("Error in formatting article: {exception}").format( exception=str(ex))) return [{"formatted_doc": formatted_doc}]
def format_document(): document_id = request.args.get('document_id') subscriber_id = request.args.get('subscriber_id') formatter_qcode = request.args.get('formatter') subscriber = get_resource_service('subscribers').find_one( req=None, _id=subscriber_id) doc = get_resource_service('archive').find_one(req=None, _id=document_id) formatter = get_formatter(formatter_qcode, doc) formatted_docs = formatter.format(article=apply_schema(doc), subscriber=subscriber, codes=None) headers = { 'Access-Control-Allow-Origin': '*', 'Access-Control-Allow-Methods': 'GET', 'Cache-Control': 'no-cache, no-store, must-revalidate' } return Response(formatted_docs[0][1], headers=headers, mimetype=get_mime_type(formatter_qcode))
def test_apply_schema_default(self): item = {'guid': 'guid', 'headline': 'foo'} self.assertEqual(item, apply_schema(item))
def queue_transmission(self, doc, subscribers, subscriber_codes={}, associations={}): """Method formats and then queues the article for transmission to the passed subscribers. ::Important Note:: Format Type across Subscribers can repeat. But we can't have formatted item generated once based on the format_types configured across for all the subscribers as the formatted item must have a published sequence number generated by Subscriber. :param dict doc: document to queue for transmission :param list subscribers: List of subscriber dict. :return : (list, bool) tuple of list of missing formatters and boolean flag. True if queued else False """ try: queued = False no_formatters = [] for subscriber in subscribers: try: if doc[ITEM_TYPE] not in [CONTENT_TYPE.TEXT, CONTENT_TYPE.PREFORMATTED] and \ subscriber.get('subscriber_type', '') == SUBSCRIBER_TYPES.WIRE: # wire subscribers can get only text and preformatted stories continue for destination in self.get_destinations(subscriber): embed_package_items = doc[ITEM_TYPE] == CONTENT_TYPE.COMPOSITE and \ (destination.get('config') or {}).get('packaged', False) if embed_package_items: doc = self._embed_package_items(doc) if doc.get(PUBLISHED_IN_PACKAGE) and \ (destination.get('config') or {}).get('packaged', False): continue # Step 2(a) formatter = get_formatter(destination['format'], doc) if not formatter: # if formatter not found then record it no_formatters.append(destination['format']) continue formatted_docs = formatter.format(apply_schema(doc), subscriber, subscriber_codes.get(subscriber[config.ID_FIELD])) for idx, publish_data in enumerate(formatted_docs): if not isinstance(publish_data, dict): pub_seq_num, formatted_doc = publish_data formatted_docs[idx] = {'published_seq_num': pub_seq_num, 'formatted_item': formatted_doc} else: assert 'published_seq_num' in publish_data and 'formatted_item' in publish_data,\ "missing keys in publish_data" for publish_queue_item in formatted_docs: publish_queue_item['item_id'] = doc['item_id'] publish_queue_item['item_version'] = doc[config.VERSION] publish_queue_item['subscriber_id'] = subscriber[config.ID_FIELD] publish_queue_item['codes'] = subscriber_codes.get(subscriber[config.ID_FIELD]) publish_queue_item['destination'] = destination # publish_schedule is just to indicate in the queue item is create via scheduled item publish_queue_item[PUBLISH_SCHEDULE] = get_utc_schedule(doc, PUBLISH_SCHEDULE) or None publish_queue_item['unique_name'] = doc.get('unique_name', None) publish_queue_item['content_type'] = doc.get('type', None) publish_queue_item['headline'] = doc.get('headline', None) publish_queue_item['publishing_action'] = self.published_state publish_queue_item['ingest_provider'] = \ ObjectId(doc.get('ingest_provider')) if doc.get('ingest_provider') else None publish_queue_item['associated_items'] = associations.get(subscriber[config.ID_FIELD], []) if doc.get(PUBLISHED_IN_PACKAGE): publish_queue_item[PUBLISHED_IN_PACKAGE] = doc[PUBLISHED_IN_PACKAGE] try: encoded_item = publish_queue_item.pop('encoded_item') except KeyError: pass else: binary = io.BytesIO(encoded_item) publish_queue_item['encoded_item_id'] = app.storage.put(binary) publish_queue_item.pop(ITEM_STATE, None) # content api delivery will be marked as SUCCESS in queue get_resource_service('publish_queue').post([publish_queue_item]) queued = True except Exception: logger.exception("Failed to queue item for id {} with headline {} for subscriber {}." .format(doc.get(config.ID_FIELD), doc.get('headline'), subscriber.get('name'))) return no_formatters, queued except Exception: raise
def test_apply_schema_profile(self, mock): item = {'headline': 'foo', 'slugline': 'bar', 'guid': '1', 'profile': 'test'} self.assertEqual({'headline': 'foo', 'guid': '1', 'profile': 'test'}, apply_schema(item))
def test_apply_schema_default(self): item = {'guid': 'guid', 'headline': 'foo'} self.assertEqual(item, apply_schema(item))
def queue_transmission(self, doc, subscribers, subscriber_codes={}, associations={}): """Method formats and then queues the article for transmission to the passed subscribers. ::Important Note:: Format Type across Subscribers can repeat. But we can't have formatted item generated once based on the format_types configured across for all the subscribers as the formatted item must have a published sequence number generated by Subscriber. :param dict doc: document to queue for transmission :param list subscribers: List of subscriber dict. :return : (list, bool) tuple of list of missing formatters and boolean flag. True if queued else False """ try: queued = False no_formatters = [] for subscriber in subscribers: try: if doc[ITEM_TYPE] not in [CONTENT_TYPE.TEXT, CONTENT_TYPE.PREFORMATTED] and \ subscriber.get('subscriber_type', '') == SUBSCRIBER_TYPES.WIRE: # wire subscribers can get only text and preformatted stories continue for destination in self.get_destinations(subscriber): embed_package_items = doc[ITEM_TYPE] == CONTENT_TYPE.COMPOSITE and \ PACKAGE_TYPE not in doc and (destination.get('config') or {}).get('packaged', False) if embed_package_items: doc = self._embed_package_items(doc) if doc.get(PUBLISHED_IN_PACKAGE) and \ (destination.get('config') or {}).get('packaged', False) and \ app.config.get('NO_TAKES'): continue # Step 2(a) formatter = get_formatter(destination['format'], doc) if not formatter: # if formatter not found then record it no_formatters.append(destination['format']) continue formatted_docs = formatter.format( apply_schema(doc), subscriber, subscriber_codes.get(subscriber[config.ID_FIELD])) for idx, publish_data in enumerate(formatted_docs): if not isinstance(publish_data, dict): pub_seq_num, formatted_doc = publish_data formatted_docs[idx] = { 'published_seq_num': pub_seq_num, 'formatted_item': formatted_doc } else: assert 'published_seq_num' in publish_data and 'formatted_item' in publish_data,\ "missing keys in publish_data" for publish_queue_item in formatted_docs: publish_queue_item['item_id'] = doc['item_id'] publish_queue_item['item_version'] = doc[ config.VERSION] publish_queue_item['subscriber_id'] = subscriber[ config.ID_FIELD] publish_queue_item['codes'] = subscriber_codes.get( subscriber[config.ID_FIELD]) publish_queue_item['destination'] = destination # publish_schedule is just to indicate in the queue item is create via scheduled item publish_queue_item[ PUBLISH_SCHEDULE] = get_utc_schedule( doc, PUBLISH_SCHEDULE) or None publish_queue_item['unique_name'] = doc.get( 'unique_name', None) publish_queue_item['content_type'] = doc.get( 'type', None) publish_queue_item['headline'] = doc.get( 'headline', None) publish_queue_item[ 'publishing_action'] = self.published_state publish_queue_item['ingest_provider'] = \ ObjectId(doc.get('ingest_provider')) if doc.get('ingest_provider') else None publish_queue_item[ 'associated_items'] = associations.get( subscriber[config.ID_FIELD], []) if doc.get(PUBLISHED_IN_PACKAGE): publish_queue_item[PUBLISHED_IN_PACKAGE] = doc[ PUBLISHED_IN_PACKAGE] try: encoded_item = publish_queue_item.pop( 'encoded_item') except KeyError: pass else: binary = io.BytesIO(encoded_item) publish_queue_item[ 'encoded_item_id'] = app.storage.put( binary) publish_queue_item.pop(ITEM_STATE, None) get_resource_service('publish_queue').post( [publish_queue_item]) queued = True except: logger.exception( "Failed to queue item for id {} with headline {} for subscriber {}." .format(doc.get(config.ID_FIELD), doc.get('headline'), subscriber.get('name'))) return no_formatters, queued except: raise
def test_apply_schema_default(self): item = {"guid": "guid", "headline": "foo"} self.assertEqual(item, apply_schema(item))