def run(self, ad_username, ad_password, username, admin='false'): """ Imports or Updates a User Profile from AD to Mongo. :param ad_username: Active Directory Username :param ad_password: Password of Active Directory Username :param username: Username as in Active Directory whose profile needs to be imported to Superdesk. :return: User Profile. """ # force type conversion to boolean user_type = 'administrator' if admin is not None and admin.lower() == 'true' else 'user' # Authenticate and fetch profile from AD settings = app.settings ad_auth = ADAuth(settings['LDAP_SERVER'], settings['LDAP_SERVER_PORT'], settings['LDAP_BASE_FILTER'], settings['LDAP_USER_FILTER'], settings['LDAP_USER_ATTRIBUTES'], settings['LDAP_FQDN']) user_data = ad_auth.authenticate_and_fetch_profile(ad_username, ad_password, username) if len(user_data) == 0: raise SuperdeskApiError.notFoundError('Username not found') # Check if User Profile already exists in Mongo user = superdesk.get_resource_service('users').find_one(username=username, req=None) if user: superdesk.get_resource_service('users').patch(user.get('_id'), user_data) else: add_default_values(user_data, username, user_type=user_type) superdesk.get_resource_service('users').post([user_data]) return user_data
def _get_field_values(self): values = {} vocabularies_resource = get_resource_service('vocabularies') values['anpa_category'] = vocabularies_resource.find_one(req=None, _id='categories')['items'] req = ParsedRequest() req.where = json.dumps({'$or': [{"schema_field": "genre"}, {"_id": "genre"}]}) genre = vocabularies_resource.get(req=req, lookup=None) if genre.count(): values['genre'] = genre[0]['items'] values['urgency'] = vocabularies_resource.find_one(req=None, _id='urgency')['items'] values['priority'] = vocabularies_resource.find_one(req=None, _id='priority')['items'] values['type'] = vocabularies_resource.find_one(req=None, _id='type')['items'] subject = vocabularies_resource.find_one(req=None, schema_field='subject') if subject: values['subject'] = subject['items'] else: values['subject'] = get_subjectcodeitems() values['desk'] = list(get_resource_service('desks').get(None, {})) values['stage'] = self._get_stage_field_values(values['desk']) values['sms'] = [{'qcode': 0, 'name': 'False'}, {'qcode': 1, 'name': 'True'}] values['embargo'] = [{'qcode': 0, 'name': 'False'}, {'qcode': 1, 'name': 'True'}] req = ParsedRequest() req.where = json.dumps({'$or': [{"schema_field": "place"}, {"_id": "place"}, {"_id": "locators"}]}) place = vocabularies_resource.get(req=req, lookup=None) if place.count(): values['place'] = place[0]['items'] values['ingest_provider'] = list(get_resource_service('ingest_providers').get(None, {})) return values
def test_ingest_cancellation(self): provider_name = 'reuters' guid = 'tag_reuters.com_2016_newsml_L1N14N0FF:978556838' provider = get_resource_service('ingest_providers').find_one(name=provider_name, req=None) provider_service = self._get_provider_service(provider) provider_service.provider = provider provider_service.URL = provider.get('config', {}).get('url') items = provider_service.fetch_ingest(guid) for item in items: item['ingest_provider'] = provider['_id'] item['expiry'] = utcnow() + timedelta(hours=11) self.ingest_items(items, provider, provider_service) guid = 'tag_reuters.com_2016_newsml_L1N14N0FF:1542761538' items = provider_service.fetch_ingest(guid) for item in items: item['ingest_provider'] = provider['_id'] item['expiry'] = utcnow() + timedelta(hours=11) self.ingest_items(items, provider, provider_service) ingest_service = get_resource_service('ingest') lookup = {'uri': items[0].get('uri')} family_members = ingest_service.get_from_mongo(req=None, lookup=lookup) self.assertEqual(family_members.count(), 2) for relative in family_members: self.assertEqual(relative['pubstatus'], 'canceled') self.assertEqual(relative['state'], 'killed')
def _duplicate_versions(self, old_id, new_doc): """ Duplicates the version history of the article identified by old_id. Each version identifiers are changed to have the identifiers of new_doc. :param old_id: identifier to fetch version history :param new_doc: identifiers from this doc will be used to create version history for the duplicated item. """ resource_def = app.config['DOMAIN']['archive'] version_id = versioned_id_field(resource_def) old_versions = get_resource_service('archive_versions').get(req=None, lookup={'guid': old_id}) new_versions = [] for old_version in old_versions: old_version[version_id] = new_doc[config.ID_FIELD] del old_version[config.ID_FIELD] old_version['guid'] = new_doc['guid'] old_version['unique_name'] = new_doc['unique_name'] old_version['unique_id'] = new_doc['unique_id'] old_version['versioncreated'] = utcnow() if old_version[VERSION] == new_doc[VERSION]: old_version[ITEM_OPERATION] = new_doc[ITEM_OPERATION] new_versions.append(old_version) last_version = deepcopy(new_doc) last_version['_id_document'] = new_doc['_id'] del last_version['_id'] new_versions.append(last_version) if new_versions: get_resource_service('archive_versions').post(new_versions)
def test_products(self, article): req = ParsedRequest() results = [] products = list(get_resource_service('products').get(req=req, lookup=None)) for product in products: result = {} result['product_id'] = product['_id'] result['matched'] = True reason = '' if not EnqueueService().conforms_product_targets(product, article): # Here it fails to match due to geo restriction # story has target_region and product has geo restriction result['matched'] = False if BasePublishService().is_targeted(article, 'target_regions'): reason = 'Story has target_region' if product.get('geo_restrictions'): reason = '{} {}'.format(reason, 'Product has target_region') if not EnqueueService().conforms_content_filter(product, article): # Here it fails to match due to content filter content_filter = product.get('content_filter') filter = get_resource_service('content_filters').find_one(req=None, _id=content_filter['filter_id']) result['matched'] = False reason = 'Story does not match the filter: {}'.format(filter.get('name')) result['reason'] = reason results.append(result) return results
def get_provider_routing_scheme(provider): """Returns the ingests provider's routing scheme configuration. If provider has a routing scheme defined (i.e. scheme ID is not None), the scheme is fetched from the database. If not, nothing is returned. For all scheme rules that have a reference to a content filter defined, that filter's configuration is fetched from the database as well and embedded into the corresponding scheme rule. :param dict provider: ingest provider configuration :return: fetched provider's routing scheme configuration (if any) :rtype: dict or None """ if not provider.get('routing_scheme'): return None schemes_service = superdesk.get_resource_service('routing_schemes') filters_service = superdesk.get_resource_service('content_filters') scheme = schemes_service.find_one(_id=provider['routing_scheme'], req=None) # for those routing rules that have a content filter defined, # get that filter from DB and embed it into the rule... rules_filters = ( (rule, str(rule['filter'])) for rule in scheme['rules'] if rule.get('filter')) for rule, filter_id in rules_filters: content_filter = filters_service.find_one(_id=filter_id, req=None) rule['filter'] = content_filter return scheme
def test_expiring_with_content(self): provider_name = 'reuters' guid = 'tag_reuters.com_2014_newsml_KBN0FL0NM:10' provider = get_resource_service('ingest_providers').find_one(name=provider_name, req=None) provider_service = self._get_provider_service(provider) provider_service.provider = provider provider_service.URL = provider.get('config', {}).get('url') items = provider_service.fetch_ingest(guid) now = utcnow() for i, item in enumerate(items): item['ingest_provider'] = provider['_id'] expiry_time = now - timedelta(hours=11) if i > 4: expiry_time = now + timedelta(minutes=11) item['expiry'] = item['versioncreated'] = expiry_time service = get_resource_service('ingest') service.post(items) # ingest the items and expire them before = service.get(req=None, lookup={}) self.assertEqual(6, before.count()) remove = RemoveExpiredContent() remove.run(provider.get('type')) # only one left in ingest after = service.get(req=None, lookup={}) self.assertEqual(1, after.count())
def create(self, docs, **kwargs): doc = docs[0] if len(docs) > 0 else {} original_id = request.view_args['original_id'] update_document = doc.get('update') archive_service = get_resource_service(ARCHIVE) original = archive_service.find_one(req=None, _id=original_id) self._validate_rewrite(original, update_document) digital = TakesPackageService().get_take_package(original) rewrite = self._create_rewrite_article(original, digital, existing_item=update_document, desk_id=doc.get('desk_id')) if update_document: # process the existing story archive_service.patch(update_document[config.ID_FIELD], rewrite) rewrite[config.ID_FIELD] = update_document[config.ID_FIELD] ids = [update_document[config.ID_FIELD]] else: ids = archive_service.post([rewrite]) build_custom_hateoas(CUSTOM_HATEOAS, rewrite) self._add_rewritten_flag(original, digital, rewrite) get_resource_service('archive_broadcast').on_broadcast_master_updated(ITEM_CREATE, item=original, rewrite_id=ids[0]) return [rewrite]
def run(self, republish): # update themes theme_service = get_resource_service('themes') created, updated = theme_service.update_registered_theme_with_local_files() print('\n* %d themes updated from local files\n' % (len(created) + len(updated))) # retrieves all opened blogs blogs_service = get_resource_service('blogs') blogs = blogs_service.get(req=None, lookup=dict(blog_status='open')) print('* Update the theme for every blog\n') for blog in blogs: theme = blogs_service.get_theme_snapshot(blog['blog_preferences']['theme']) try: blogs_service.system_update(ObjectId(blog['_id']), {'theme': theme}, blog) except eve.io.base.DataLayer.OriginalChangedError: print(u'! an error occured during saving blog "%s".' % (blog['title']), 'Can be a broken relationship (with user for instance)') else: print('- Blog "%s"\'s theme was updated to %s %s' % ( blog['title'], theme['name'], theme['version'])) # republish on s3 if republish: print('\n* Republishing blogs:\n') for blog in blogs: url = publish_blog_embed_on_s3(blog_id=str(blog['_id']), safe=False) print(' - Blog "%s" republished: %s' % (blog['title'], url))
def upload_fixture_image( self, fixture_image_path, verification_stats_path, verification_result_path, headline='test' ): with self.app.app_context(): with open(fixture_image_path, mode='rb') as f: file_name = ntpath.basename(fixture_image_path) file_type = 'image' content_type = '%s/%s' % (file_type, imghdr.what(f)) file_id = app.media.put( f, filename=file_name, content_type=content_type, resource=get_resource_service('ingest').datasource, metadata={} ) inserted = [file_id] renditions = generate_renditions( f, file_id, inserted, file_type, content_type, rendition_config=config.RENDITIONS['picture'], url_for_media=url_for_media ) data = [{ 'headline': headline, 'slugline': 'rebuild', 'renditions': renditions, 'type': 'picture' }] image_id = get_resource_service('ingest').post(data) with open(verification_result_path, 'r') as f: self.expected_verification_results.append(json.load(f)) with open(verification_stats_path, 'r') as f: self.expected_verification_stats.append(json.load(f)) return image_id
def enhance_document_with_user_privileges(self, session_doc, user_doc): role_doc = get_resource_service('users').get_role(user_doc) get_resource_service('users').set_privileges(user_doc, role_doc) session_doc[_privileges_key] = user_doc.get(_privileges_key, {}) # set last_updated to max for session/user/role so that client will fetch changes # after a change to any of those session_doc[app.config['LAST_UPDATED']] = last_updated(session_doc, user_doc, role_doc)
def find_one(self, req, **lookup): session = get_resource_service('sessions').find_one(req=None, _id=lookup['_id']) _id = session['user'] if session else lookup['_id'] doc = get_resource_service('users').find_one(req, _id=_id) if doc: doc['_id'] = session['_id'] if session else _id return doc
def transmit_items(queue_items, subscriber, destination, output_channels): failed_items = [] for queue_item in queue_items: # Check if output channel is active if not (output_channels.get(str(queue_item['output_channel_id']), {})).get('is_active', False): continue try: if not is_on_time(queue_item, destination): continue # update the status of the item to in-progress queue_update = {'state': 'in-progress', 'transmit_started_at': utcnow()} superdesk.get_resource_service('publish_queue').patch(queue_item.get('_id'), queue_update) # get the formatted item formatted_item = superdesk.get_resource_service('formatted_item').\ find_one(req=None, _id=queue_item['formatted_item_id']) transmitter = superdesk.publish.transmitters[destination.get('delivery_type')] transmitter.transmit(queue_item, formatted_item, subscriber, destination) update_content_state(queue_item) except: failed_items.append(queue_item) if len(failed_items) > 0: logger.error('Failed to publish the following items: %s', str(failed_items))
def unlock(self, item_filter, user_id, session_id, etag): item_model = get_model(ItemModel) item = item_model.find_one(item_filter) if not item: raise SuperdeskApiError.notFoundError() if not item.get(LOCK_USER): raise SuperdeskApiError.badRequestError(message="Item is not locked.") can_user_unlock, error_message = self.can_unlock(item, user_id) if can_user_unlock: self.app.on_item_unlock(item, user_id) # delete the item if nothing is saved so far # version 0 created on lock item if item.get(config.VERSION, 0) == 0 and item[ITEM_STATE] == CONTENT_STATE.DRAFT: superdesk.get_resource_service('archive').delete_action(lookup={'_id': item['_id']}) push_content_notification([item]) else: updates = {LOCK_USER: None, LOCK_SESSION: None, 'lock_time': None, 'force_unlock': True} item_model.update(item_filter, updates) self.app.on_item_unlocked(item, user_id) push_notification('item:unlock', item=str(item_filter.get(config.ID_FIELD)), item_version=str(item.get(config.VERSION)), state=item.get(ITEM_STATE), user=str(user_id), lock_session=str(session_id)) else: raise SuperdeskApiError.forbiddenError(message=error_message) item = item_model.find_one(item_filter) return item
def update(self, id, updates, original): original_state = original[config.CONTENT_STATE] if not is_workflow_state_transition_valid("spike", original_state): raise InvalidStateTransitionError() package_service = PackageService() user = get_user(required=True) item = get_resource_service(ARCHIVE).find_one(req=None, _id=id) expiry_minutes = app.settings["SPIKE_EXPIRY_MINUTES"] # check if item is in a desk. If it's then use the desks spike_expiry if is_assigned_to_a_desk(item): desk = get_resource_service("desks").find_one(_id=item["task"]["desk"], req=None) expiry_minutes = desk.get("spike_expiry", expiry_minutes) updates[EXPIRY] = get_expiry_date(expiry_minutes) updates[REVERT_STATE] = item.get(app.config["CONTENT_STATE"], None) if original.get("rewrite_of"): updates["rewrite_of"] = None item = self.backend.update(self.datasource, id, updates, original) push_notification("item:spike", item=str(item.get("_id")), user=str(user)) package_service.remove_spiked_refs_from_package(id) return item
def restore_version(self, id, doc): item_id = id old_version = int(doc.get('old_version', 0)) last_version = int(doc.get('last_version', 0)) if (not all([item_id, old_version, last_version])): return None old = get_resource_service('archive_versions').find_one(req=None, _id_document=item_id, _version=old_version) if old is None: raise SuperdeskApiError.notFoundError('Invalid version %s' % old_version) curr = get_resource_service(SOURCE).find_one(req=None, _id=item_id) if curr is None: raise SuperdeskApiError.notFoundError('Invalid item id %s' % item_id) if curr[config.VERSION] != last_version: raise SuperdeskApiError.preconditionFailedError('Invalid last version %s' % last_version) old['_id'] = old['_id_document'] old['_updated'] = old['versioncreated'] = utcnow() set_item_expiry(old, doc) del old['_id_document'] resolve_document_version(old, 'archive', 'PATCH', curr) remove_unwanted(old) res = super().replace(id=item_id, document=old) del doc['old_version'] del doc['last_version'] doc.update(old) return res
def lock(self, item_filter, user_id, session_id, etag): item_model = get_model(ItemModel) item = item_model.find_one(item_filter) if not item: raise SuperdeskApiError.notFoundError() can_user_lock, error_message = self.can_lock(item, user_id, session_id) if can_user_lock: self.app.on_item_lock(item, user_id) updates = {LOCK_USER: user_id, LOCK_SESSION: session_id, 'lock_time': utcnow()} item_model.update(item_filter, updates) if item.get(TASK): item[TASK]['user'] = user_id else: item[TASK] = {'user': user_id} superdesk.get_resource_service('tasks').assign_user(item[config.ID_FIELD], item[TASK]) self.app.on_item_locked(item, user_id) push_notification('item:lock', item=str(item.get(config.ID_FIELD)), item_version=str(item.get(config.VERSION)), user=str(user_id), lock_time=updates['lock_time'], lock_session=str(session_id)) else: raise SuperdeskApiError.forbiddenError(message=error_message) item = item_model.find_one(item_filter) return item
def delete(self, lookup): """ Overriding to delete stages before deleting a desk """ superdesk.get_resource_service('stages').delete(lookup={'desk': lookup.get(config.ID_FIELD)}) super().delete(lookup)
def deschedule_item(self, updates, doc): updates['state'] = 'in_progress' updates['publish_schedule'] = None # delete entries from publish queue get_resource_service('publish_queue').delete_by_article_id(doc['_id']) # delete entry from published repo get_resource_service('published').delete_by_article_id(doc['_id'])
def test_files_dont_duplicate_ingest(self): provider_name = 'reuters' guid = 'tag_reuters.com_2014_newsml_KBN0FL0NM' provider = get_resource_service('ingest_providers').find_one(name=provider_name, req=None) provider_service = self.provider_services[provider.get('type')] provider_service.provider = provider items = provider_service.fetch_ingest(guid) for item in items: item['ingest_provider'] = provider['_id'] item['expiry'] = utcnow() + timedelta(hours=11) service = get_resource_service('ingest') service.post(items) # ingest the items self.ingest_items(items, provider) items = provider_service.fetch_ingest(guid) for item in items: item['ingest_provider'] = provider['_id'] item['expiry'] = utcnow() + timedelta(hours=11) # ingest them again self.ingest_items(items, provider) # 12 files in grid fs current_files = self.app.media.fs('upload').find() self.assertEqual(12, current_files.count())
def test_subject_to_anpa_category_derived_ingest_ignores_inactive_map_entries(self): vocab = [{'_id': 'iptc_category_map', 'items': [{'name': 'Finance', 'category': 'f', 'subject': '04000000', 'is_active': False}]}, {'_id': 'categories', 'items': [{'is_active': True, 'name': 'Australian Weather', 'qcode': 'b', 'subject': '17000000'}]}] self.app.data.insert('vocabularies', vocab) provider_name = 'AAP' guid = 'nitf-fishing.xml' provider = get_resource_service('ingest_providers').find_one(name=provider_name, req=None) provider_service = self.provider_services[provider.get('type')] provider_service.provider = provider items = provider_service.parse_file(guid, provider) for item in items: item['ingest_provider'] = provider['_id'] item['expiry'] = utcnow() + timedelta(hours=11) service = get_resource_service('ingest') service.post(items) # ingest the items and check the subject code has been derived self.ingest_items(items, provider) self.assertNotIn('anpa_category', items[0])
def get_expiry(desk_id, stage_id, offset=None): """ Calculates the expiry for a content from fetching the expiry duration from one of the below 1. desk identified by desk_id 2. stage identified by stage_id :param desk_id: desk identifier :param stage_id: stage identifier :return: when the doc will expire """ stage = None desk = None if desk_id: desk = superdesk.get_resource_service('desks').find_one(req=None, _id=desk_id) if not desk: raise SuperdeskApiError.notFoundError('Invalid desk identifier %s' % desk_id) if stage_id: stage = get_resource_service('stages').find_one(req=None, _id=stage_id) if not stage: raise SuperdeskApiError.notFoundError('Invalid stage identifier %s' % stage_id) return get_item_expiry(desk, stage, offset)
def get_expiry(desk_id=None, stage_id=None, desk_or_stage_doc=None): """ Calculates the expiry for a content from fetching the expiry duration from one of the below 1. desk identified by desk_id 2. stage identified by stage_id. This will ignore desk_id if specified 3. desk doc or stage doc identified by doc_or_stage_doc. This will ignore desk_id and stage_id if specified :param desk_id: desk identifier :param stage_id: stage identifier :param desk_or_stage_doc: doc from either desks collection or stages collection :return: when the doc will expire """ stage = None if desk_or_stage_doc is None and desk_id: desk = superdesk.get_resource_service('desks').find_one(req=None, _id=desk_id) if not desk: raise SuperdeskApiError.notFoundError('Invalid desk identifier %s' % desk_id) if not stage_id: stage = get_resource_service('stages').find_one(req=None, _id=desk['incoming_stage']) if not stage: raise SuperdeskApiError.notFoundError('Invalid stage identifier %s' % stage_id) if desk_or_stage_doc is None and stage_id: stage = get_resource_service('stages').find_one(req=None, _id=stage_id) if not stage: raise SuperdeskApiError.notFoundError('Invalid stage identifier %s' % stage_id) return get_item_expiry(app=app, stage=desk_or_stage_doc or stage)
def _validate_disable(self, updates, original): """ Checks the templates and desks that are referencing the given content profile if the profile is being disabled """ if 'enabled' in updates and updates.get('enabled') is False and original.get('enabled') is True: templates = list(superdesk.get_resource_service('content_templates'). get_templates_by_profile_id(original.get('_id'))) if len(templates) > 0: template_names = ', '.join([t.get('template_name') for t in templates]) raise SuperdeskApiError.badRequestError( message='Cannot disable content profile as following templates are referencing: {}'. format(template_names)) req = ParsedRequest() all_desks = list(superdesk.get_resource_service('desks').get(req=req, lookup={})) profile_desks = [desk for desk in all_desks if desk.get('default_content_profile') == str(original.get('_id'))] if len(profile_desks) > 0: profile_desk_names = ', '.join([d.get('name') for d in profile_desks]) raise SuperdeskApiError.badRequestError( message='Cannot disable content profile as following desks are referencing: {}'. format(profile_desk_names))
def queue_transmission(self, doc, subscribers): """ Method formats and then queues the article for transmission to the passed subscribers. ::Important Note:: Format Type across Subscribers can repeat. But we can't have formatted item generated once based on the format_types configured across for all the subscribers as the formatted item must have a published sequence number generated by Subscriber. :param dict doc: document to queue for transmission :param list subscribers: List of subscriber dict. :return : (list, bool) tuple of list of missing formatters and boolean flag. True if queued else False """ try: queued = False no_formatters = [] for subscriber in subscribers: try: if doc[ITEM_TYPE] not in [CONTENT_TYPE.TEXT, CONTENT_TYPE.PREFORMATTED] and \ subscriber.get('subscriber_type', '') == SUBSCRIBER_TYPES.WIRE: # wire subscribers can get only text and preformatted stories continue for destination in subscriber['destinations']: # Step 2(a) formatter = get_formatter(destination['format'], doc) if not formatter: # if formatter not found then record it no_formatters.append(destination['format']) continue formatted_docs = formatter.format(doc, subscriber) for pub_seq_num, formatted_doc in formatted_docs: publish_queue_item = dict() publish_queue_item['item_id'] = doc['_id'] publish_queue_item['item_version'] = doc[config.VERSION] publish_queue_item['formatted_item'] = formatted_doc publish_queue_item['subscriber_id'] = subscriber['_id'] publish_queue_item['destination'] = destination publish_queue_item['published_seq_num'] = pub_seq_num publish_queue_item['publish_schedule'] = doc.get('publish_schedule', None) publish_queue_item['unique_name'] = doc.get('unique_name', None) publish_queue_item['content_type'] = doc.get('type', None) publish_queue_item['headline'] = doc.get('headline', None) self.set_state(doc, publish_queue_item) if publish_queue_item.get(ITEM_STATE): publish_queue_item['publishing_action'] = publish_queue_item.get(ITEM_STATE) del publish_queue_item[ITEM_STATE] else: publish_queue_item['publishing_action'] = self.published_state get_resource_service('publish_queue').post([publish_queue_item]) queued = True except: logger.exception("Failed to queue item for id {} with headline {} for subscriber {}." .format(doc.get(config.ID_FIELD), doc.get('headline'), subscriber.get('name'))) return no_formatters, queued except: raise
def test_subject_to_anpa_category_derived_ingest_ignores_inactive_map_entries(self): vocab = [{'_id': 'iptc_category_map', 'items': [{'name': 'Finance', 'category': 'f', 'subject': '04000000', 'is_active': False}]}, {'_id': 'categories', 'items': [{'is_active': True, 'name': 'Australian Weather', 'qcode': 'b', 'subject': '17000000'}]}] self.app.data.insert('vocabularies', vocab) provider_name = 'AAP' provider = get_resource_service('ingest_providers').find_one(name=provider_name, req=None) file_path = os.path.join(provider.get('config', {}).get('path', ''), 'nitf-fishing.xml') provider_service = self._get_provider_service(provider) feeding_parser = provider_service.get_feed_parser(provider) with open(file_path, 'r') as f: xml_string = etree.etree.fromstring(f.read()) items = [feeding_parser.parse(xml_string, provider)] for item in items: item['ingest_provider'] = provider['_id'] item['expiry'] = utcnow() + timedelta(hours=11) service = get_resource_service('ingest') service.post(items) # ingest the items and check the subject code has been derived self.ingest_items(items, provider) self.assertNotIn('anpa_category', items[0])
def on_delete(self, doc): """ Checks if deleting the stage would not violate data integrity, raises an exception if it does. 1/ Can't delete the default incoming stage 2/ The stage must have no documents (spiked or unspiked) 3/ The stage can not be referred to by a ingest routing rule :param doc: :return: """ if doc['default_incoming'] is True: desk_id = doc.get('desk', None) if desk_id and superdesk.get_resource_service('desks').find_one(req=None, _id=desk_id): raise SuperdeskApiError.preconditionFailedError(message='Cannot delete a default stage.') archive_versions_query = {'task.stage': str(doc[config.ID_FIELD])} items = superdesk.get_resource_service('archive_versions').get(req=None, lookup=archive_versions_query) if items and items.count(): raise SuperdeskApiError.preconditionFailedError( message='Cannot delete stage as it has article(s) or referenced by versions of the article(s).') # check if the stage is referred to in a ingest routing rule rules = self._stage_in_rule(doc[config.ID_FIELD]) if rules.count() > 0: rule_names = ', '.join(rule.get('name') for rule in rules) raise SuperdeskApiError.preconditionFailedError( message='Stage is referred by Ingest Routing Schemes : {}'.format(rule_names))
def data_scaffolding_test(self): with self.app.app_context(): command = AppInitializeWithDataCommand() result = command.run() self.assertEquals(result, 0) service = get_resource_service('text_archive') docs = [{ 'type': 'text', 'abstract': 'test abstract {}'.format(x), 'headline': 'test headline {}'.format(x), 'body_html': 'test long story body {}'.format(x) } for x in range(0, 40)] service.post(docs) stories_per_desk = 2 existing_desks = 1 command = AppScaffoldDataCommand() result = command.run(stories_per_desk) self.assertEquals(result, 0) cursor = get_resource_service('desks').get_from_mongo(None, {}) self.assertEquals(cursor.count(), existing_desks) cursor = get_resource_service('archive').get_from_mongo(None, {}) self.assertEquals(cursor.count(), existing_desks * stories_per_desk)
def on_deleted(self, doc): """ Overriding clean up reset password tokens: """ super().on_deleted(doc) get_resource_service('reset_user_password').remove_all_tokens_for_email(doc.get('email'))
def test_expiring_content_with_files(self): provider_name = 'reuters' guid = 'tag_reuters.com_2014_newsml_KBN0FL0NM' provider = get_resource_service('ingest_providers').find_one(name=provider_name, req=None) provider_service = self.provider_services[provider.get('type')] provider_service.provider = provider items = provider_service.fetch_ingest(guid) for item in items: item['ingest_provider'] = provider['_id'] now = utcnow() items[0]['expiry'] = now - timedelta(hours=11) items[1]['expiry'] = now - timedelta(hours=11) items[2]['expiry'] = now + timedelta(hours=11) items[5]['versioncreated'] = now + timedelta(minutes=11) service = get_resource_service('ingest') service.post(items) # ingest the items and expire them self.ingest_items(items, provider) # four files in grid fs current_files = self.app.media.fs('upload').find() self.assertEqual(4, current_files.count()) remove = RemoveExpiredContent() remove.run(provider.get('type')) # all gone current_files = self.app.media.fs('upload').find() self.assertEqual(0, current_files.count())
def on_delete(self, doc): """Runs on delete of archive item. Overriding to validate the item being killed is actually eligible for kill. Validates the following: 1. Is item of type Text? 2. Is item a Broadcast Script? 3. Does item acts as a Master Story for any of the existing broadcasts? 4. Is item available in production or part of a normal package? 5. Is the associated Digital Story is available in production or part of normal package? 6. If item is a Take then is any take available in production or part of normal package? :param doc: represents the article in archived collection :type doc: dict :raises SuperdeskApiError.badRequestError() if any of the above validation conditions fail. """ bad_req_error = SuperdeskApiError.badRequestError id_field = doc[config.ID_FIELD] item_id = doc['item_id'] doc['item_id'] = id_field doc[config.ID_FIELD] = item_id if doc[ITEM_TYPE] != CONTENT_TYPE.TEXT: raise bad_req_error( message= 'Only Text articles are allowed to Kill in Archived repo') if is_genre(doc, BROADCAST_GENRE): raise bad_req_error( message= "Killing of Broadcast Items isn't allowed in Archived repo") if get_resource_service( 'archive_broadcast').get_broadcast_items_from_master_story( doc, True): raise bad_req_error( message= "Can't kill as this article acts as a Master Story for existing broadcast(s)" ) if get_resource_service(ARCHIVE).find_one(req=None, _id=doc[GUID_FIELD]): raise bad_req_error( message="Can't Kill as article is still available in production" ) if is_item_in_package(doc): raise bad_req_error( message="Can't kill as article is part of a Package") takes_package_service = TakesPackageService() takes_package_id = takes_package_service.get_take_package_id(doc) if takes_package_id: if get_resource_service(ARCHIVE).find_one(req=None, _id=takes_package_id): raise bad_req_error( message= "Can't Kill as the Digital Story is still available in production" ) req = ParsedRequest() req.sort = '[("%s", -1)]' % config.VERSION takes_package = list( self.get(req=req, lookup={'item_id': takes_package_id})) if not takes_package: raise bad_req_error( message= 'Digital Story of the article not found in Archived repo') takes_package = takes_package[0] if is_item_in_package(takes_package): raise bad_req_error( message="Can't kill as Digital Story is part of a Package") for takes_ref in takes_package_service.get_package_refs( takes_package): if takes_ref[RESIDREF] != doc[GUID_FIELD]: if get_resource_service(ARCHIVE).find_one( req=None, _id=takes_ref[RESIDREF]): raise bad_req_error( message= "Can't Kill as Take(s) are still available in production" ) take = list( self.get(req=None, lookup={'item_id': takes_ref[RESIDREF]})) if not take: raise bad_req_error( message='One of Take(s) not found in Archived repo' ) if is_item_in_package(take[0]): raise bad_req_error( message= "Can't kill as one of Take(s) is part of a Package" ) doc['item_id'] = item_id doc[config.ID_FIELD] = id_field
def number_of_data_updates_applied(self): return get_resource_service('data_updates').find({}).count()
def get_expired_items(provider_id, ingest_collection): query_filter = get_query_for_expired_items(provider_id) return superdesk.get_resource_service(ingest_collection).get_from_mongo( lookup=query_filter, req=None)
def delete(id): """ Deletes the user by given id """ get_resource_service('users').delete({'_id': ObjectId(id)}) return jsonify({'success': True}), 200
def step_impl_given_role(context, role_name): with context.app.test_request_context(context.app.config['URL_PREFIX']): role = get_resource_service('roles').find_one(name=role_name, req=None) data = json.dumps({'roles': [str(role['_id'])]}) response = patch_current_user(context, data) assert_ok(response)
def step_impl_given_empty(context, resource): with context.app.test_request_context(context.app.config['URL_PREFIX']): get_resource_service(resource).delete_action()
def on_created(self, docs): for doc in docs: get_resource_service('preferences').set_session_based_prefs( doc['_id'], doc['user'])
def remove_old_default(self, desk, field): lookup = {'$and': [{field: True}, {'desk': str(desk)}]} stages = self.get(req=None, lookup=lookup) for stage in stages: get_resource_service('stages').update(stage.get('_id'), {field: False}, stage)
def get_stage_documents(self, stage_id): query_filter = superdesk.json.dumps({'term': {'task.stage': stage_id}}) req = ParsedRequest() req.args = {'filter': query_filter} return superdesk.get_resource_service(ARCHIVE).get(req, None)
def update(self, id, updates, original): """Runs on update of archive item. Overriding to handle with Kill workflow in the Archived repo: 1. Check if Article has an associated Digital Story and if Digital Story has more Takes. If both Digital Story and more Takes exists then all of them would be killed along with the one requested 2. If the item is flagged as archived only then it was never created by or published from the system so all that needs to be done is to delete it and send an email to all subscribers 3. For each article being killed do the following: i. Create an entry in archive, archive_versions and published collections. ii. Query the Publish Queue in Legal Archive and find the subscribers who received the article previously and create transmission entries in Publish Queue. iii. Change the state of the article to Killed in Legal Archive. iv. Delete all the published versions from Archived. v. Send a broadcast email to all subscribers. :param id: primary key of the item to be killed :type id: str :param updates: updates to be applied on the article before saving :type updates: dict :param original: :type original: dict """ # Step 1 articles_to_kill = self._find_articles_to_kill({'_id': id}) logger.info('Fetched articles to kill for id: {}'.format(id)) articles_to_kill.sort( key=itemgetter(ITEM_TYPE), reverse=True) # Needed because package has to be inserted last kill_service = KillPublishService() updated = original.copy() for article in articles_to_kill: updates_copy = deepcopy(updates) kill_service.apply_kill_override(article, updates_copy) updated.update(updates_copy) # Step 2, If it is flagged as archived only it has no related items in the system so can be deleted. # An email is sent to all subscribers if original.get('flags', {}).get('marked_archived_only', False): super().delete({'item_id': article['item_id']}) logger.info('Delete for article: {}'.format( article[config.ID_FIELD])) kill_service.broadcast_kill_email(article, updates_copy) logger.info('Broadcast kill email for article: {}'.format( article[config.ID_FIELD])) continue # Step 3(i) self._remove_and_set_kill_properties(article, articles_to_kill, updated) logger.info( 'Removing and setting properties for article: {}'.format( article[config.ID_FIELD])) # Step 3(ii) transmission_details = list( get_resource_service(LEGAL_PUBLISH_QUEUE_NAME).get( req=None, lookup={'item_id': article['item_id']})) if transmission_details: subscriber_ids = [ t['_subscriber_id'] for t in transmission_details ] query = {'$and': [{config.ID_FIELD: {'$in': subscriber_ids}}]} subscribers = list( get_resource_service('subscribers').get(req=None, lookup=query)) EnqueueKilledService().queue_transmission(article, subscribers) logger.info('Queued Transmission for article: {}'.format( article[config.ID_FIELD])) article[config.ID_FIELD] = article.pop('item_id', article['item_id']) # Step 3(iv) super().delete({'item_id': article[config.ID_FIELD]}) logger.info('Delete for article: {}'.format( article[config.ID_FIELD])) # Step 3(i) - Creating entries in published collection docs = [article] get_resource_service(ARCHIVE).post(docs) insert_into_versions(doc=article) published_doc = deepcopy(article) published_doc[QUEUE_STATE] = PUBLISH_STATE.QUEUED get_resource_service('published').post([published_doc]) logger.info( 'Insert into archive and published for article: {}'.format( article[config.ID_FIELD])) # Step 3(iii) import_into_legal_archive.apply_async( countdown=3, kwargs={'item_id': article[config.ID_FIELD]}) logger.info('Legal Archive import for article: {}'.format( article[config.ID_FIELD])) # Step 3(v) kill_service.broadcast_kill_email(article, updates) logger.info('Broadcast kill email for article: {}'.format( article[config.ID_FIELD]))
def get(self, req, lookup): """ Return the list of languages defined on config file. """ languages = superdesk.get_resource_service("vocabularies").get_languages() return ListCursor([view_language(lang) for lang in languages])
def clear_desk_ref(self, doc, field): desk = get_resource_service('desks').find_one(_id=doc.get('desk'), req=None) if desk: get_resource_service('desks').update(doc.get('desk'), {field: None}, desk)
def on_created(self, docs): for doc in docs: push_notification(self.notification_key, created=1, desk_id=str(doc.get(config.ID_FIELD))) get_resource_service("users").update_stage_visibility_for_users()
def _create_rewrite_article(self, original, existing_item=None, desk_id=None): """Creates a new story and sets the metadata from original. :param dict original: original story :param dict existing_item: existing story that is being re-written :return:new story """ rewrite = dict() fields = [ 'family_id', 'event_id', 'flags', 'language', ASSOCIATIONS, 'extra' ] existing_item_preserve_fields = (ASSOCIATIONS, 'flags') if app.config.get('COPY_ON_REWRITE_FIELDS'): fields.extend(app.config['COPY_ON_REWRITE_FIELDS']) if existing_item: # for associate an existing file as update merge subjects subjects = original.get('subject', []) unique_subjects = {subject.get('qcode') for subject in subjects} rewrite['subject'] = [ subject for subject in existing_item.get('subject', []) if subject.get('qcode') not in unique_subjects ] rewrite['subject'].extend(subjects) rewrite['flags'] = original['flags'] or {} # preserve flags for key in rewrite.get('flags').keys(): rewrite['flags'][ key] = original['flags'][key] or existing_item.get( 'flags', {}).get(key, False) original_associations = original.get(ASSOCIATIONS) or {} existing_associations = existing_item.get(ASSOCIATIONS) or {} rewrite[ASSOCIATIONS] = existing_associations # if the existing item has association then preserve the association for key, assoc in original_associations.items(): if not existing_associations.get(key): rewrite[ASSOCIATIONS][key] = assoc else: # ingest provider and source to be retained for new item fields.extend(['ingest_provider', 'source']) if original.get('profile'): content_type = get_resource_service('content_types').find_one( req=None, _id=original['profile']) extended_fields = list(content_type['schema'].keys()) # extra fields needed. extended_fields.extend([ 'profile', 'keywords', 'target_regions', 'target_types', 'target_subscribers' ]) else: extended_fields = [ 'abstract', 'anpa_category', 'pubstatus', 'slugline', 'urgency', 'subject', 'priority', 'byline', 'dateline', 'headline', 'place', 'genre', 'body_footer', 'company_codes', 'keywords', 'target_regions', 'target_types', 'target_subscribers' ] fields.extend(extended_fields) for field in fields: if original.get(field): # don't overwrite some fields in existing items if existing_item and field in existing_item_preserve_fields: continue rewrite[field] = original[field] # if the original was flagged for SMS the rewrite should not be. if not existing_item and rewrite.get('flags', {}).get( 'marked_for_sms', False): rewrite['flags']['marked_for_sms'] = False # SD-4595 - Default value for the update article to be set based on the system config. if config.RESET_PRIORITY_VALUE_FOR_UPDATE_ARTICLES: # if True then reset to the default priority value. rewrite['priority'] = int( config.DEFAULT_PRIORITY_VALUE_FOR_MANUAL_ARTICLES) rewrite['rewrite_of'] = original[config.ID_FIELD] rewrite['rewrite_sequence'] = (original.get('rewrite_sequence') or 0) + 1 rewrite.pop(PROCESSED_FROM, None) if not existing_item: # send the document to the desk only if a new rewrite is created send_to(doc=rewrite, desk_id=(desk_id or original['task']['desk']), default_stage='working_stage', user_id=get_user_id()) # if we are rewriting a published item then copy the body_html if original.get('state', '') in (CONTENT_STATE.PUBLISHED, CONTENT_STATE.CORRECTED, CONTENT_STATE.SCHEDULED): rewrite['body_html'] = original.get('body_html', '') rewrite[ITEM_STATE] = CONTENT_STATE.PROGRESS self._set_take_key(rewrite) return rewrite
def link_archive_items_to_assignments(self, assignment, related_items, actioned_item, doc): assignments_service = get_resource_service('assignments') delivery_service = get_resource_service('delivery') assignments_service.validate_assignment_action(assignment) already_completed = assignment['assigned_to'][ 'state'] == ASSIGNMENT_WORKFLOW_STATE.COMPLETED items = [] ids = [] deliveries = [] published_updated_items = [] updates = {'assigned_to': deepcopy(assignment.get('assigned_to'))} need_complete = None for item in related_items: if not item.get('assignment_id') or (item['_id'] == actioned_item.get('_id') and doc.get('force')): # Update the delivery for the item if one exists delivery = delivery_service.find_one( req=None, item_id=item[config.ID_FIELD]) if delivery: delivery_service.patch( delivery['_id'], { 'assignment_id': assignment['_id'], 'scheduled_update_id': assignment.get('scheduled_update_id'), }) else: # Add a delivery for the item deliveries.append({ 'item_id': item[config.ID_FIELD], 'assignment_id': assignment.get(config.ID_FIELD), 'planning_id': assignment['planning_item'], 'coverage_id': assignment['coverage_item'], 'item_state': item.get('state'), 'sequence_no': item.get('rewrite_sequence') or 0, 'publish_time': get_delivery_publish_time(item), 'scheduled_update_id': assignment.get('scheduled_update_id'), }) # Update archive/published collection with assignment linking update_assignment_on_link_unlink(assignment[config.ID_FIELD], item, published_updated_items) ids.append(item.get(config.ID_FIELD)) items.append(item) if item.get(ITEM_STATE) in [CONTENT_STATE.PUBLISHED, CONTENT_STATE.CORRECTED] and \ not assignment.get('scheduled_update_id') and \ assignment['assigned_to']['state'] != ASSIGNMENT_WORKFLOW_STATE.COMPLETED: # If assignment belongs to coverage, 'complete' it if any news item is published need_complete = True # Create all deliveries if len(deliveries) > 0: delivery_service.post(deliveries) self.update_assignment(updates, assignment, actioned_item, doc.pop('reassign', None), already_completed, need_complete) actioned_item['assignment_id'] = assignment[config.ID_FIELD] doc.update(actioned_item) # Save assignment history # Update assignment history with all items affected if len(ids) > 0: updates['assigned_to']['item_ids'] = ids if not assignment.get('scheduled_update_id'): assignment_history_service = get_resource_service( 'assignments_history') assignment_history_service.on_item_content_link( updates, assignment) if (actioned_item.get(ITEM_STATE) not in [CONTENT_STATE.PUBLISHED, CONTENT_STATE.CORRECTED] or already_completed) and not need_complete: # publishing planning item assignments_service.publish_planning( assignment['planning_item']) # Send notifications push_content_notification(items) push_notification('content:link', item=str(actioned_item[config.ID_FIELD]), assignment=assignment[config.ID_FIELD]) return ids
def _users_aggregation(self, desk_id: str) -> List[Dict]: desks_service = superdesk.get_resource_service("desks") es_query: Dict[str, Any] es_assign_query: Dict[str, Any] desk_filter: Dict[str, Any] if desk_id == "all": desk_filter = {} es_query = {} else: desk_filter = {"_id": ObjectId(desk_id)} es_query = {"filter": [{"term": {"task.desk": desk_id}}]} req = ParsedRequest() req.projection = json.dumps({"members": 1}) found = desks_service.get(req, desk_filter) members = set() for d in found: members.update({m["user"] for m in d["members"]}) users_aggregation = app.data.pymongo().db.users.aggregate([ { "$match": { "_id": { "$in": list(members) } } }, { "$group": { "_id": "$role", "authors": { "$addToSet": "$_id" } } }, ]) # only do aggregations on content accesible by user content_filters = superdesk.get_resource_service( "search").get_archive_filters() if content_filters: es_query.setdefault("filter", []).extend(content_filters) # first we check archives for locked items es_query["aggs"] = { "desk_authors": { "filter": { "bool": { "filter": { "terms": { "lock_user": [str(m) for m in members] } } } }, "aggs": { "authors": { "terms": { "field": "lock_user", "size": SIZE_MAX }, "aggs": { "locked": { "filter": { "exists": { "field": "lock_user", } } }, }, } }, } } docs_agg = app.data.elastic.search(es_query, "archive", params={"size": 0}) stats_by_authors = {} for a in docs_agg.hits["aggregations"]["desk_authors"]["authors"][ "buckets"]: stats_by_authors[a["key"]] = { "locked": a["locked"]["doc_count"], "assigned": 0, } # then assignments if desk_id == "all": desk_filter = {} es_assign_query = {} else: desk_filter = {"_id": ObjectId(desk_id)} es_assign_query = { "filter": { "term": { "assigned_to.desk": desk_id } } } es_assign_query["aggs"] = { "desk_authors": { "filter": { "terms": { "assigned_to.user": [str(m) for m in members] } }, "aggs": { "authors": { "terms": { "field": "assigned_to.user", "size": SIZE_MAX }, } }, } } try: assign_agg = app.data.elastic.search(es_assign_query, "assignments", params={"size": 0}) except KeyError: logger.warning( 'Can\'t access "assignments" collection, planning is probably not installed' ) else: for a in assign_agg.hits["aggregations"]["desk_authors"][ "authors"]["buckets"]: stats_by_authors.setdefault( a["key"], {"locked": 0})["assigned"] = a["doc_count"] overview = [] for a in users_aggregation: role = a["_id"] authors_dict: Dict[str, Any] = {} role_dict = { "role": role, "authors": authors_dict, } authors = a["authors"] for author in authors: author = str(author) try: authors_dict[author] = stats_by_authors[author] except KeyError: logger.debug( "No article found for {author}".format(author=author)) authors_dict[author] = {"assigned": 0, "locked": 0} overview.append(role_dict) return overview
def init_default_content_profile(doc): if not doc.get('profile'): desk_id = doc.get('task', {}).get('desk') desk = get_resource_service('desks').find_one(req=None, _id=desk_id) doc['profile'] = desk.get('default_content_profile')
def setUp(self): try: from apps.legal_archive.commands import ImportLegalArchiveCommand except ImportError: self.fail("Could not import class under test (ImportLegalArchiveCommand).") else: self.class_under_test = ImportLegalArchiveCommand self.app.data.insert("desks", self.desks) self.app.data.insert("users", self.users) self.validators = [ {"schema": {}, "type": "text", "act": "publish", "_id": "publish_text"}, {"schema": {}, "type": "text", "act": "correct", "_id": "correct_text"}, {"schema": {}, "type": "text", "act": "kill", "_id": "kill_text"}, ] self.products = [ {"_id": "1", "name": "prod1"}, {"_id": "2", "name": "prod2", "codes": "abc,def"}, {"_id": "3", "name": "prod3", "codes": "xyz"}, ] self.subscribers = [ { "name": "Test", "is_active": True, "subscriber_type": "wire", "email": "*****@*****.**", "sequence_num_settings": {"max": 9999, "min": 1}, "products": ["1"], "destinations": [ { "name": "test", "delivery_type": "email", "format": "nitf", "config": {"recipients": "*****@*****.**"}, } ], } ] self.app.data.insert("validators", self.validators) self.app.data.insert("products", self.products) self.app.data.insert("subscribers", self.subscribers) self.class_under_test = ImportLegalArchiveCommand self.archive_items = [ { "task": {"desk": self.desks[0]["_id"], "stage": self.desks[0]["incoming_stage"], "user": "******"}, "_id": "item1", "state": "in_progress", "headline": "item 1", "type": "text", "slugline": "item 1 slugline", "_current_version": 1, "_created": utcnow() - timedelta(minutes=3), "expired": utcnow() - timedelta(minutes=30), }, { "task": {"desk": self.desks[0]["_id"], "stage": self.desks[0]["incoming_stage"], "user": "******"}, "_id": "item2", "state": "in_progress", "headline": "item 2", "type": "text", "slugline": "item 2 slugline", "_current_version": 1, "_created": utcnow() - timedelta(minutes=2), "expired": utcnow() - timedelta(minutes=30), }, { "task": {"desk": self.desks[0]["_id"], "stage": self.desks[0]["incoming_stage"], "user": "******"}, "_id": "item3", "state": "in_progress", "headline": "item 2", "type": "text", "slugline": "item 2 slugline", "_current_version": 1, "_created": utcnow() - timedelta(minutes=1), "expired": utcnow() - timedelta(minutes=30), }, ] get_resource_service(ARCHIVE).post(self.archive_items) for item in self.archive_items: resolve_document_version(item, ARCHIVE, "POST") insert_into_versions(id_=item["_id"])
def import_file(self, entity_name, path, file_name, index_params, do_patch=False, force=False): """Imports seed data based on the entity_name (resource name) from the file_name specified. index_params use to create index for that entity/resource :param str entity_name: name of the resource :param str file_name: file name that contains seed data :param list index_params: list of indexes that is created on that entity. For example: [[("first_name", pymongo.ASCENDING), ("last_name", pymongo.ASCENDING)], "username"] will create two indexes - composite index of "first_name", "last_name" field. - index on username field. Alternatively index param can be specified as [[("first_name", pymongo.ASCENDING), ("last_name", pymongo.ASCENDING)], [("username", pymongo.ASCENDING)]] Refer to pymongo create_index documentation for more information. http://api.mongodb.org/python/current/api/pymongo/collection.html :param bool do_patch: if True then patch the document else don't patch. """ logger.info("Process %r", entity_name) file_path = file_name and get_filepath(file_name, path) if not file_path: pass elif not file_path.exists(): logger.info(" - file not exists: %s", file_path) else: logger.info(" - got file path: %s", file_path) with file_path.open("rt", encoding="utf-8") as app_prepopulation: service = superdesk.get_resource_service(entity_name) json_data = json.loads(app_prepopulation.read()) data = [fillEnvironmentVariables(item) for item in json_data] data = [ app.data.mongo._mongotize(item, service.datasource) for item in data if item ] existing_data = [] existing = service.get_from_mongo(None, {}) update_data = True if not do_patch and existing.count() > 0: logger.info(" - data already exists none will be loaded") update_data = False elif do_patch and existing.count() > 0: logger.info(" - data already exists it will be updated") if update_data: if do_patch: for item in existing: for loaded_item in data: if "_id" in loaded_item and loaded_item[ "_id"] == item["_id"]: data.remove(loaded_item) if force or item.get("init_version", 0) < loaded_item.get( "init_version", 0): existing_data.append(loaded_item) if data: for item in data: if not item.get(config.ETAG): item.setdefault(config.ETAG, "init") service.post(data) if existing_data and do_patch: for item in existing_data: item["_etag"] = "init" service.update( item["_id"], item, service.find_one(None, _id=item["_id"])) logger.info(" - file imported successfully: %s", file_name) if index_params: for index in index_params: crt_index = list(index) if isinstance(index, list) else index options = crt_index.pop() if isinstance( crt_index[-1], dict) and isinstance(index, list) else {} collection = app.data.mongo.pymongo( resource=entity_name).db[entity_name] options.setdefault("background", True) index_name = collection.create_index(crt_index, **options) logger.info( " - index: %s for collection %s created successfully.", index_name, entity_name)
def _validate(self, doc): assignment = get_resource_service('assignments').find_one( req=None, _id=doc.get('assignment_id')) if not assignment: raise SuperdeskApiError.badRequestError('Assignment not found.') item = get_resource_service('archive').find_one(req=None, _id=doc.get('item_id')) if not item: raise SuperdeskApiError.badRequestError('Content item not found.') if not doc.get('force') and item.get('assignment_id'): raise SuperdeskApiError.badRequestError( 'Content is already linked to an assignment. Cannot link assignment and content.' ) if not is_assigned_to_a_desk(item): raise SuperdeskApiError.badRequestError( 'Content not in workflow. Cannot link assignment and content.') if not item.get('rewrite_of'): delivery = get_resource_service('delivery').find_one( req=None, assignment_id=ObjectId(doc.get('assignment_id'))) if delivery: raise SuperdeskApiError.badRequestError( 'Content already exists for the assignment. Cannot link assignment and content.' ) # scheduled update validation if assignment.get('scheduled_update_id'): raise SuperdeskApiError.badRequestError( 'Only updates can be linked to a scheduled update assignment' ) coverage = get_coverage_for_assignment(assignment) allowed_states = [ ASSIGNMENT_WORKFLOW_STATE.IN_PROGRESS, ASSIGNMENT_WORKFLOW_STATE.COMPLETED ] if (coverage and len(coverage.get('scheduled_updates')) > 0 and str(assignment['_id']) != str( (coverage.get('assigned_to') or {}).get('assignment_id'))): if (coverage.get('assigned_to') or {}).get('state') not in allowed_states: raise SuperdeskApiError( 'Previous coverage is not linked to content.') # Check all previous scheduled updated to be linked/completed for s in coverage.get('scheduled_updates'): assigned_to = (s.get('assigned_to') or {}) if str(assigned_to.get('assignment_id')) == str( doc.get('assignment_id')): break if assigned_to.get('state') not in allowed_states: raise SuperdeskApiError( 'Previous scheduled-update pending content-linking/completion' )
def generate_text_item(items, template_name, resource_type): template = get_resource_service('planning_export_templates').get_export_template(template_name, resource_type) archive_service = get_resource_service('archive') if not template: raise SuperdeskApiError.badRequestError('Invalid template selected') for item in items: # Create list of assignee with preference to coverage_provider, if not, assigned user item['published_archive_items'] = [] item['assignees'] = [] item['text_assignees'] = [] item['contacts'] = [] text_users = [] text_desks = [] users = [] desks = [] def enhance_coverage(planning, item, users): for c in (planning.get('coverages') or []): is_text = c.get('planning', {}).get('g2_content_type', '') == 'text' completed = (c.get('assigned_to') or {}).get('state') == ASSIGNMENT_WORKFLOW_STATE.COMPLETED assigned_to = c.get('assigned_to') or {} user = None desk = None if assigned_to.get('coverage_provider'): item['assignees'].append(assigned_to['coverage_provider']['name']) if is_text and not completed: item['text_assignees'].append(assigned_to['coverage_provider']['name']) elif assigned_to.get('user'): user = assigned_to['user'] users.append(user) elif assigned_to.get('desk'): desk = assigned_to.get('desk') desks.append(desk) # Get abstract from related text item if coverage is 'complete' if is_text: if completed: results = list(archive_service.get_from_mongo(req=None, lookup={ 'assignment_id': ObjectId( c['assigned_to']['assignment_id']), 'state': {'$in': ['published', 'corrected']}, 'pubstatus': 'usable', 'rewrite_of': None })) if len(results) > 0: item['published_archive_items'].append({ 'archive_text': get_first_paragraph_text(results[0].get('abstract')) or '', 'archive_slugline': results[0].get('slugline') or '' }) elif c.get('news_coverage_status', {}).get('qcode') == 'ncostat:int': if user: text_users.append(user) else: text_desks.append(desk) item['contacts'] = get_contacts_from_item(item) if resource_type == 'planning': enhance_coverage(item, item, users) else: for p in (item.get('plannings') or []): enhance_coverage(p, item, users) users = get_resource_service('users').find(where={ '_id': {'$in': users} }) desks = get_resource_service('desks').find(where={ '_id': {'$in': desks} }) for u in users: name = "{0} {1}".format(u.get('last_name'), u.get('first_name')) item['assignees'].append(name) if str(u['_id']) in text_users: item['text_assignees'].append(name) for d in desks: item['assignees'].append(d['name']) if str(d['_id']) in text_desks: item['text_assignees'].append(d['name']) set_item_place(item) item['description_text'] = item.get('description_text') or (item.get('event') or {}).get('definition_short') item['slugline'] = item.get('slugline') or (item.get('event') or {}).get('name') # Handle dates and remote time-zones if item.get('dates') or (item.get('event') or {}).get('dates'): dates = item.get('dates') or item.get('event').get('dates') item['schedule'] = utc_to_local(config.DEFAULT_TIMEZONE, dates.get('start')) if get_timezone_offset(config.DEFAULT_TIMEZONE, utcnow()) !=\ get_timezone_offset(dates.get('tz'), utcnow()): item['schedule'] = "{} ({})".format(item['schedule'].strftime('%H%M'), item['schedule'].tzname()) else: item['schedule'] = item['schedule'].strftime('%H%M') agendas = [] if resource_type == 'planning': agendas = group_items_by_agenda(items) inject_internal_converages(items) labels = {} cv = get_resource_service('vocabularies').find_one(req=None, _id='g2_content_type') if cv: labels = {_type['qcode']: _type['name'] for _type in cv['items']} for item in items: item['coverages'] = [labels.get(coverage.get('planning').get('g2_content_type'), coverage.get('planning').get('g2_content_type')) + (' (cancelled)' if coverage.get('workflow_status', '') == 'cancelled' else '') for coverage in item.get('coverages', []) if (coverage.get('planning') or {}).get('g2_content_type')] article = {} for key, value in template.items(): if value.endswith(".html"): article[key.replace('_template', '')] = render_template(value, items=items, agendas=agendas) else: article[key] = render_template_string(value, items=items, agendas=agendas) return article
def _get_vocabulary_display_name(self, vocabulary_id): vocabulary = get_resource_service('vocabularies').find_one( req=None, _id=vocabulary_id) if vocabulary and 'display_name' in vocabulary: return vocabulary['display_name'] return vocabulary_id
def test_belga_keywords(self): self.app.data.insert( 'desks', [{ '_id': ObjectId('5d385f17fe985ec5e1a78b49'), 'name': 'Politic Desk', 'default_content_profile': 'belga_text', 'default_content_template': 'content_template_1', 'desk_language': 'fr', 'source': 'politic' }]) self.app.data.insert('stages', [{ '_id': ObjectId('5d385f31fe985ec67a0ca583'), 'name': 'Incoming Stage', 'default_incoming': True, 'desk_order': 2, 'content_expiry': None, 'working_stage': False, 'is_visible': True, 'desk': ObjectId('5d385f17fe985ec5e1a78b49') }]) self.app.data.insert('vocabularies', [{ "_id": "belga-keywords", "display_name": "Belga Keywords", "type": "manageable", "selection_type": "multi selection", "unique_field": "qcode", "schema": { "name": {}, "qcode": {}, "translations": {} }, "service": { "all": 1 }, "items": [{ "name": "BRIEF", "qcode": "BRIEF", "is_active": True, "translations": { "name": { "nl": "BRIEF", "fr": "BRIEF" } } }, { "name": "PREVIEW", "qcode": "PREVIEW", "is_active": True, "translations": { "name": { "nl": "VOORBERICHT", "fr": "AVANT-PAPIER" } } }] }]) self.app.data.insert('content_templates', [{ '_id': 'content_template_1', 'template_name': 'belga text', 'is_public': True, 'data': { 'profile': 'belga_text', 'type': 'text', 'pubstatus': 'usable', 'format': 'HTML', 'headline': '', 'language': 'en', 'keywords': ['some', 'keyword'], 'body_html': '' }, 'template_type': 'create', }]) item = { '_id': 'urn:newsml:localhost:5000:2019-12-10T14:43:46.224107:d13ac5ae-7f43-4b7f-89a5-2c6835389564', 'guid': 'urn:newsml:localhost:5000:2019-12-10T14:43:46.224107:d13ac5ae-7f43-4b7f-89a5-2c6835389564', 'headline': 'test headline', 'slugine': 'test slugline', 'state': 'published', 'type': 'text', "subject": [{ 'name': 'BRIEF', 'qcode': 'BRIEF', 'translations': { 'name': { 'nl': 'BRIEF', 'fr': 'BRIEF' } }, 'scheme': 'belga-keywords' }], 'keywords': ['foo', 'bar'], 'language': 'fr' } self.app.data.insert('archive', [item]) self.assertRaises(StopDuplication, set_default_metadata_with_translate, item, dest_desk_id=ObjectId('5d385f17fe985ec5e1a78b49'), dest_stage_id=ObjectId('5d385f31fe985ec67a0ca583')) archive_service = get_resource_service('archive') new_item = archive_service.find_one( req=None, original_id= 'urn:newsml:localhost:5000:2019-12-10T14:43:46.224107:d13ac5ae-7f43-4b7f-89a5-2c6835389564' ) self.assertEqual(item["subject"], new_item["subject"])
def get_desk_template(desk): default_content_template = desk.get('default_content_template') if default_content_template: return get_resource_service('content_templates').find_one(req=None, _id=default_content_template) return {}
def _get_content_filters_by_content_filter(self, content_filter_id): lookup = {'content_filter.expression.pf': {'$in': [content_filter_id]}} content_filters = get_resource_service('content_filters').get( req=None, lookup=lookup) return content_filters
def test_duplicate(self): self.app.data.insert( 'desks', [{ '_id': ObjectId('5d385f17fe985ec5e1a78b49'), 'name': 'Politic Desk', 'default_content_profile': 'belga_text', 'default_content_template': 'content_template_1', 'desk_language': 'fr', 'source': 'politic' }]) self.app.data.insert('stages', [{ '_id': ObjectId('5d385f31fe985ec67a0ca583'), 'name': 'Incoming Stage', 'default_incoming': True, 'desk_order': 2, 'content_expiry': None, 'working_stage': False, 'is_visible': True, 'desk': ObjectId('5d385f17fe985ec5e1a78b49') }]) self.app.data.insert('content_templates', [{ '_id': 'content_template_1', 'template_name': 'belga text', 'is_public': True, 'data': { 'profile': 'belga_text', 'type': 'text', 'pubstatus': 'usable', 'format': 'HTML', 'headline': '', 'subject': [ { 'name': 'INT/GENERAL', 'qcode': 'INT/GENERAL', 'parent': 'INT', 'scheme': 'services-products' }, { 'name': 'default', 'qcode': 'default', 'scheme': 'distribution' }, ], 'language': 'en', 'keywords': ['some', 'keyword'], 'body_html': '' }, 'template_type': 'create', }]) item = { '_id': 'urn:newsml:localhost:5000:2019-12-10T14:43:46.224107:d13ac5ae-7f43-4b7f-89a5-2c6835389564', 'guid': 'urn:newsml:localhost:5000:2019-12-10T14:43:46.224107:d13ac5ae-7f43-4b7f-89a5-2c6835389564', 'headline': 'test headline', 'slugine': 'test slugline', 'state': 'published', 'type': 'text', 'keywords': ['foo', 'bar'], 'language': 'en' } self.app.data.insert('archive', [item]) self.assertRaises(StopDuplication, set_default_metadata_with_translate, item, dest_desk_id=ObjectId('5d385f17fe985ec5e1a78b49'), dest_stage_id=ObjectId('5d385f31fe985ec67a0ca583')) archive_service = get_resource_service('archive') new_item = archive_service.find_one( req=None, original_id= 'urn:newsml:localhost:5000:2019-12-10T14:43:46.224107:d13ac5ae-7f43-4b7f-89a5-2c6835389564' ) self.assertNotIn('translated_from', new_item)
def archive_item(self, guid, provider_id, user, task_id=None): try: # For CELERY_ALWAYS_EAGER=True the current request context is # empty but already initialized one is on request_stack if app.config['CELERY_ALWAYS_EAGER']: self.request_stack.pop() crt_task_id = self.request.id if not task_id: task_id = crt_task_id if not self.request.retries: update_status(*add_subtask_to_progress(task_id)) provider = superdesk.get_resource_service('ingest_providers').find_one( req=None, _id=provider_id) if provider is None: message = 'For ingest with guid= %s, failed to retrieve provider with _id=%s' % ( guid, provider_id) raise_fail(task_id, message) service_provider = superdesk.io.providers[provider.get('type')] service_provider.provider = provider item = None old_item = False try: items = service_provider.get_items(guid) except LookupError: ingest_doc = superdesk.get_resource_service('ingest').find_one( req=None, _id=guid) if not ingest_doc: message = 'Not found the ingest with guid: %s for provider %s' % ( guid, provider.get('type')) raise_fail(task_id, message) else: old_item = True ingest_doc.pop('_id') items = [ingest_doc] except Exception: raise self.retry(countdown=2) for item_it in items: if 'guid' in item_it and item_it['guid'] == guid: item = item_it break if item is None: message = 'Returned ingest but not found the ingest with guid: %s for provider %s' \ % (guid, provider.get('type')) raise_fail(task_id, message) if not old_item: item['created'] = item['firstcreated'] = utc.localize( item['firstcreated']) item['updated'] = item['versioncreated'] = utc.localize( item['versioncreated']) ''' Necessary because flask.g.user is None while fetching packages the for grouped items or while patching in archive collection. Without this version_creator is set None which doesn't make sense. ''' flask.g.user = user remove_unwanted(item) superdesk.get_resource_service(ARCHIVE).patch(guid, item) tasks = [] for group in item.get('groups', []): for ref in group.get('refs', []): if 'residRef' in ref: resid_ref = ref.get('residRef') doc = { 'guid': resid_ref, 'ingest_provider': provider_id, 'task_id': crt_task_id } archived_doc = superdesk.get_resource_service( ARCHIVE).find_one(req=None, guid=doc.get('guid')) # check if task already started if not archived_doc: doc.setdefault('_id', doc.get('guid')) superdesk.get_resource_service(ARCHIVE).post([doc]) elif archived_doc.get('task_id') == crt_task_id: # it is a retry so continue archived_doc.update(doc) remove_unwanted(archived_doc) superdesk.get_resource_service(ARCHIVE).patch( archived_doc.get('_id'), archived_doc) else: # there is a cyclic dependency, skip it continue mark_ingest_as_archived(doc.get('guid')) tasks.append( archive_item.s(resid_ref, provider.get('_id'), user, task_id)) for rendition in item.get('renditions', {}).values(): href = service_provider.prepare_href(rendition['href']) if rendition['rendition'] == 'baseImage': tasks.append(archive_media.s(task_id, guid, href)) else: tasks.append( archive_rendition.s(task_id, guid, rendition['rendition'], href)) update_status(*finish_subtask_from_progress(task_id)) if tasks: chord((task for task in tasks), update_item.s(crt_task_id == task_id, task_id, guid)).delay() else: insert_into_versions(guid, task_id) if task_id == crt_task_id: update_status(*finish_task_for_progress(task_id)) except Exception: logger.error(traceback.format_exc())
def _process_bunch(self, x): # x.findall('dc_rest_docs/dc_rest_doc')[0].get('href') items = [] for doc in x.findall('dc_rest_docs/dc_rest_doc'): try: # print(doc.get('href')) id = doc.find('dcdossier').get('id') if self._direction: if int(id) > self._id: self._id = int(id) else: if int(id) < self._id: self._id = int(id) item = {} item['guid'] = doc.find('dcdossier').get('guid') item[ITEM_TYPE] = CONTENT_TYPE.TEXT format = self._get_head_value(doc, 'Format') if format == 't': item[FORMAT] = FORMATS.PRESERVED else: item[FORMAT] = FORMATS.HTML # item[FORMAT] = FORMATS.HTML # if the item has been modified in the archive then it is due to a kill # there is an argument that this item should not be imported at all if doc.find('dcdossier').get('created') != doc.find( 'dcdossier').get('modified'): # item[ITEM_STATE] = CONTENT_STATE.KILLED continue else: item[ITEM_STATE] = CONTENT_STATE.PUBLISHED value = datetime.strptime( self._get_head_value(doc, 'PublicationDate'), '%Y%m%d%H%M%S') local_tz = pytz.timezone('Australia/Sydney') try: aus_dt = local_tz.localize(value, is_dst=None) except NonExistentTimeError as ex: aus_dt = local_tz.localize(value, is_dst=True) except AmbiguousTimeError: aus_dt = local_tz.localize(value, is_dst=False) item['firstcreated'] = aus_dt.astimezone(pytz.utc) item['versioncreated'] = item['firstcreated'] generate_unique_id_and_name(item) item['ingest_id'] = id last_line = None el = doc.find('dcdossier/document/body/BodyText') if el is not None: story = el.text lines = story.split('\n') if len(lines) > 0: last_line = lines[-1] if item.get(FORMAT) == FORMATS.HTML: story = story.replace('\n ', '<p></p>') story = story.replace('\n', '<br>') item['body_html'] = '<p>' + story + '</p>' else: item['body_html'] = '<pre>' + story + '</pre>' try: item['word_count'] = get_text_word_count( item['body_html']) except: pass else: # Items with no body are ignored continue item['source'] = self._get_head_value(doc, 'Agency') # if the source document contains no agency then by definition it is unknown if item['source'] is None: item['source'] = 'UNKNOWN' else: # check if the source of the document was Newscentre dc_unique = doc.find('dcdossier').get('unique') if dc_unique.startswith('NC.') and last_line is not None: # The AFR summary articles all have agency values 25 chars long if len(item['source']) == 25: item['source'] = 'AAP' # is it a numeric Agency elif self._get_head_value(doc, 'Agency').isdigit(): sign_off = last_line.split(' ') if len(sign_off) > 0: item['source'] = sign_off[0].upper() else: item['source'] = sign_off.upper() # clean up what we have extracted if item['source'].startswith('AAP'): item['source'] = 'AAP' else: # make sure it is one of the known values if item['source'] not in { 'AAP', 'AP', 'REUT', 'Asia Pulse', 'DPA', 'AFP', 'RAW', 'NZA', 'NZPA', 'KRT', 'PA', 'PAA', 'SNI', 'REUTERS' }: print('Source : {}'.format(item['source'])) item['source'] = 'UNKNOWN' # self._addkeywords('AsiaPulseCodes', doc, item) byline = self._get_head_value(doc, 'Byline') if byline: item['byline'] = byline # item['service'] = self._get_head_value(doc,'Service') category = self._get_head_value(doc, 'Category') if not category: publication_name = self._get_head_value( doc, 'PublicationName') if publication_name in pubnames: category = pubnames[publication_name] if category: anpacategory = {} anpacategory['qcode'] = category for anpa_category in self._anpa_categories['items']: if anpacategory['qcode'].lower( ) == anpa_category['qcode'].lower(): anpacategory = { 'qcode': anpacategory['qcode'], 'name': anpa_category['name'] } break item['anpa_category'] = [anpacategory] self._addkeywords('CompanyCodes', doc, item) item['keyword'] = self._get_head_value(doc, 'Keyword') item['ingest_provider_sequence'] = self._get_head_value( doc, 'Sequence') orginal_source = self._get_head_value(doc, 'Author') if orginal_source: item['original_source'] = orginal_source item['headline'] = self._get_head_value(doc, 'Headline') code = self._get_head_value(doc, 'SubjectRefNum') if code and len(code) == 7: code = '0' + code if code and code in subject_codes: item['subject'] = [] item['subject'].append({ 'qcode': code, 'name': subject_codes[code] }) try: process_iptc_codes(item, None) except: pass slug = self._get_head_value(doc, 'SLUG') if slug: item['slugline'] = slug else: item['slugline'] = self._get_head_value(doc, 'Keyword') take_key = self._get_head_value(doc, 'Takekey') if take_key: item['anpa_take_key'] = take_key self._addkeywords('Topic', doc, item) # self._addkeywords('Selectors', doc, item) item['pubstatus'] = 'usable' # this is required for the archived service additional lookup item['item_id'] = item['guid'] item[config.VERSION] = 1 item['flags'] = {'marked_archived_only': True} # item['_id'] = ObjectId(id.rjust(24,'0')) item['_id'] = ObjectId() items.append(item) if self._limit: self._limit -= 1 # print(item) except Exception as ex: print('Exception parsing DC documnent {}'.format(id)) pass try: res = superdesk.get_resource_service('archived') s = time.time() res.post(items) print('Post to Batch to Superdesk took {:.2f}'.format(time.time() - s)) except Exception as ex: if ex.code == 409: print('Key clash exceptionn detected') # create a list of the guids we tried to post guids = [g['guid'] for g in items] # create a query for all those id's query = { 'size': self.BATCH_SIZE, 'query': { 'filtered': { 'filter': { "terms": { "guid": [guids] } } } } } req = ParsedRequest() repos = 'archived' req.args = {'source': json.dumps(query), 'repo': repos} search_res = superdesk.get_resource_service('search') existing = search_res.get(req=req, lookup=None) existing_guids = [e['guid'] for e in existing] not_existing = [g for g in guids if g not in existing_guids] for missing_guid in not_existing: i = [m for m in items if m['guid'] == missing_guid] original = res.find_one(req=None, guid=i[0]['guid']) if not original: try: s = time.time() res.post(i) print( 'Post single item to Superdesk in {:.2f} seconds' .format(time.time() - s)) except Exception as ex: print('Exception posting single item') else: print('Exception posting batch')
def prepopulate_data(file_name, default_user=None, directory=None): if default_user is None: default_user = get_default_user() if not directory: directory = os.path.abspath(os.path.dirname(__file__)) placeholders = {'NOW()': date_to_str(utcnow())} users = {default_user['username']: default_user['password']} default_username = default_user['username'] file = os.path.join(directory, file_name) with open(file, 'rt', encoding='utf8') as app_prepopulation: json_data = json.load(app_prepopulation) for item in json_data: resource = item.get('resource', None) try: service = get_resource_service(resource) except KeyError: continue # resource which is not configured - ignore username = item.get('username', None) or default_username set_logged_user(username, users[username]) id_name = item.get('id_name', None) id_update = item.get('id_update', None) text = json.dumps(item.get('data', None)) text = apply_placeholders(placeholders, text) data = json.loads(text) if resource: app.data.mongo._mongotize(data, resource) if resource == 'users': users.update({data['username']: data['password']}) if id_update: id_update = apply_placeholders(placeholders, id_update) res = service.patch(ObjectId(id_update), data) if not res: raise Exception() else: try: ids = service.post([data]) except werkzeug.exceptions.Conflict: # instance was already prepopulated break except superdesk.errors.SuperdeskApiError as e: logger.exception(e) continue # an error raised by validation if not ids: raise Exception() if id_name: placeholders[id_name] = str(ids[0]) if app.config['VERSION'] in data: number_of_versions_to_insert = data[app.config['VERSION']] doc_versions = [] if data[ITEM_STATE] not in [ CONTENT_STATE.PUBLISHED, CONTENT_STATE.CORRECTED, CONTENT_STATE.KILLED ]: while number_of_versions_to_insert != 0: doc_versions.append(data.copy()) number_of_versions_to_insert -= 1 else: if data[ITEM_STATE] in [ CONTENT_STATE.KILLED, CONTENT_STATE.RECALLED, CONTENT_STATE.CORRECTED ]: latest_version = data.copy() doc_versions.append(latest_version) published_version = data.copy() published_version[ITEM_STATE] = CONTENT_STATE.PUBLISHED published_version[ITEM_OPERATION] = 'publish' published_version[app.config[ 'VERSION']] = number_of_versions_to_insert - 1 doc_versions.append(published_version) number_of_versions_to_insert -= 2 elif data[ITEM_STATE] == CONTENT_STATE.PUBLISHED: published_version = data.copy() doc_versions.append(published_version) number_of_versions_to_insert -= 1 while number_of_versions_to_insert != 0: doc = data.copy() doc[ITEM_STATE] = CONTENT_STATE.PROGRESS doc.pop(ITEM_OPERATION, '') doc[app. config['VERSION']] = number_of_versions_to_insert doc_versions.append(doc) number_of_versions_to_insert -= 1 insert_versioning_documents( resource, doc_versions if doc_versions else data)
def find_and_replace(item, **kwargs): """ Find and replace words :param dict item: :param kwargs: :return tuple(dict, dict): tuple of modified item and diff of items modified. """ diff = {} def repl(new, old): """ Returns a version of the "new" string that matches the case of the "old" string :param new: :param old: :return: a string which is a version of "new" that matches the case of old. """ if old.islower(): return new.lower() elif old.isupper(): return new.upper() else: # the old string starts with upper case so we use the title function if old[:1].isupper(): return new.title() # it is more complex so try to match it else: result = '' all_upper = True for i, c in enumerate(old): if i >= len(new): break if c.isupper(): result += new[i].upper() else: result += new[i].lower() all_upper = False # append any remaining characters from new if all_upper: result += new[i + 1:].upper() else: result += new[i + 1:].lower() return result def do_find_replace(input_string, words_list): found_list = {} for word in words_list: pattern = r'{}'.format(re.escape(word.get('existing', ''))) while re.search(pattern, input_string, flags=re.IGNORECASE): # get the original string from the input original = re.search(pattern, input_string, flags=re.IGNORECASE).group(0) replacement = repl(word.get('replacement', ''), original) if found_list.get(original): break diff[original] = replacement found_list[original] = replacement input_string = input_string.replace(original, replacement) return input_string vocab = get_resource_service('vocabularies').find_one(req=None, _id='replace_words') if vocab: replace_words_list = vocab.get('items') or [] if not replace_words_list: return (item, diff) for field in macro_replacement_fields: if not item.get(field, None): continue item[field] = do_find_replace(item[field], replace_words_list) return (item, diff)