def _get_items(self): """Get items from the LegalArchive that belong to the COMMISSION desk :return: list: list of legal archive content """ logger.info('Fetching legal archive content for the {} desk'.format(self.default_desk)) query = {'task.desk': self.default_desk, 'type': 'text'} req = ParsedRequest() req.where = json.dumps(query) legal_archive_service = get_resource_service(LEGAL_ARCHIVE_NAME) legal_items = list(legal_archive_service.get_from_mongo(req=req, lookup=None)) if legal_items: logger.info( 'Found {} items in the legal archive for the {} desk'.format( len(legal_items), self.default_desk) ) else: logger.warning('Failed to find any {} desk items in the legal archive'.format(self.default_desk)) legal_items = [] return legal_items
def _get_field_values(self): values = {} vocabularies_resource = get_resource_service('vocabularies') values['anpa_category'] = vocabularies_resource.find_one(req=None, _id='categories')['items'] req = ParsedRequest() req.where = json.dumps({'$or': [{"schema_field": "genre"}, {"_id": "genre"}]}) genre = vocabularies_resource.get(req=req, lookup=None) if genre.count(): values['genre'] = genre[0]['items'] values['urgency'] = vocabularies_resource.find_one(req=None, _id='urgency')['items'] values['priority'] = vocabularies_resource.find_one(req=None, _id='priority')['items'] values['type'] = vocabularies_resource.find_one(req=None, _id='type')['items'] subject = vocabularies_resource.find_one(req=None, schema_field='subject') if subject: values['subject'] = subject['items'] else: values['subject'] = get_subjectcodeitems() values['desk'] = list(get_resource_service('desks').get(None, {})) values['stage'] = self._get_stage_field_values(values['desk']) values['sms'] = [{'qcode': 0, 'name': 'False'}, {'qcode': 1, 'name': 'True'}] values['embargo'] = [{'qcode': 0, 'name': 'False'}, {'qcode': 1, 'name': 'True'}] req = ParsedRequest() req.where = json.dumps({'$or': [{"schema_field": "place"}, {"_id": "place"}, {"_id": "locators"}]}) place = vocabularies_resource.get(req=req, lookup=None) if place.count(): values['place'] = place[0]['items'] values['ingest_provider'] = list(get_resource_service('ingest_providers').get(None, {})) return values
def enhance_with_archive_items(self, items): if items: ids = list(set([item.get("item_id") for item in items if item.get("item_id")])) archive_items = [] archive_lookup = {} if ids: query = {"$and": [{config.ID_FIELD: {"$in": ids}}]} archive_req = ParsedRequest() archive_req.max_results = len(ids) # can't access published from elastic due filter on the archive resource hence going to mongo archive_items = list( superdesk.get_resource_service(ARCHIVE).get_from_mongo(req=archive_req, lookup=query) ) takes_service = TakesPackageService() takes_service.enhance_items_with_takes_packages(archive_items) for item in archive_items: handle_existing_data(item) archive_lookup[item[config.ID_FIELD]] = item for item in items: archive_item = archive_lookup.get(item.get("item_id"), {config.VERSION: item.get(config.VERSION, 1)}) updates = { config.ID_FIELD: item.get("item_id"), "item_id": item.get(config.ID_FIELD), "lock_user": archive_item.get("lock_user", None), "lock_time": archive_item.get("lock_time", None), "lock_session": archive_item.get("lock_session", None), "archive_item": archive_item if archive_item else None, } item.update(updates) handle_existing_data(item)
def get_published_items_by_moved_to_legal(self, item_ids, move_to_legal): """Get the pulished items where flag is moved. :param list item_ids: List of item :param bool move_to_legal: move_to_legal boolean flag :return: list of published items """ if item_ids: try: query = { 'query': { 'filtered': { 'filter': { 'and': [ {'terms': {'item_id': item_ids}}, {'term': {'moved_to_legal': move_to_legal}} ] } } } } request = ParsedRequest() request.args = {'source': json.dumps(query)} return list(super().get(req=request, lookup=None)) except: logger.exception('Failed to get published items ' 'by moved to legal: {} -- ids: {}.'.format(move_to_legal, item_ids)) return []
def get_elastic_item(self, id): resource = superdesk.get_resource_service(self.resource_name) query = {'query': {'filtered': {'filter': {'term': {'_id': id}}}}} request = ParsedRequest() request.args = {'source': json.dumps(query)} items = resource.get(req=request, lookup=None) return items[0]
def test_using_repo_request_attribute(self): with self.app.app_context(): req = ParsedRequest() req.args = {"repo": "ingest"} docs = self.app.data.find("search", req, None) self.assertEquals(1, docs.count()) self.assertEquals("ingest", docs[0]["_type"])
def get_mongo_items(self, consistency_record): # get the records from mongo in chunks projection = dict(superdesk.resources[self.resource_name].endpoint_schema['datasource']['projection']) superdesk.resources[self.resource_name].endpoint_schema['datasource']['projection'] = None service = superdesk.get_resource_service(self.resource_name) cursor = service.get_from_mongo(None, {}) count = cursor.count() no_of_buckets = len(range(0, count, self.default_page_size)) mongo_items = [] updated_mongo_items = [] request = ParsedRequest() request.projection = json.dumps({'_etag': 1, '_updated': 1}) for x in range(0, no_of_buckets): skip = x * self.default_page_size print('Page : {}, skip: {}'.format(x + 1, skip)) # don't get any new records since the elastic items are retrieved cursor = service.get_from_mongo(request, {'_created': {'$lte': consistency_record['started_at']}}) cursor.skip(skip) cursor.limit(self.default_page_size) cursor = list(cursor) mongo_items.extend([(mongo_item['_id'], mongo_item['_etag']) for mongo_item in cursor]) updated_mongo_items.extend([mongo_item['_id'] for mongo_item in cursor if mongo_item['_updated'] > consistency_record['started_at']]) superdesk.resources[self.resource_name].endpoint_schema['datasource']['projection'] = projection return mongo_items, updated_mongo_items
def get_publish_items(item_id, last_version): query = {'query': {'filtered': {'filter': {'and': [ {'term': {'item_id': item_id}}, {'term': {LAST_PUBLISHED_VERSION: last_version}} ]}}}} request = ParsedRequest() request.args = {'source': json.dumps(query), 'aggregations': 0} return self.app.data.find(PUBLISHED, req=request, lookup=None)
def _get_changed_items(self, existing_items, package): """Returns the added and removed items from existing_items :param existing_items: Existing list :param updates: Changes :return: list of removed items and list of added items """ published_service = get_resource_service('published') req = ParsedRequest() query = {'query': {'filtered': {'filter': {'and': [{'terms': {QUEUE_STATE: [ PUBLISH_STATE.QUEUED, PUBLISH_STATE.QUEUED_NOT_TRANSMITTED]}}, {'term': {'item_id': package['item_id']}}]}}}, 'sort': [{'publish_sequence_no': 'desc'}]} req.args = {'source': json.dumps(query)} req.max_results = 1 previously_published_packages = published_service.get(req=req, lookup=None) if not previously_published_packages.count(): return [], [] previously_published_package = previously_published_packages[0] if 'groups' in previously_published_package: old_items = self.package_service.get_residrefs(previously_published_package) added_items = list(set(existing_items) - set(old_items)) removed_items = list(set(old_items) - set(existing_items)) return removed_items, added_items else: return [], []
def filter_subscribers(self, doc, subscribers, target_media_type): """ Filter subscribers to whom the current story is going to be delivered. """ filtered_subscribers = [] req = ParsedRequest() req.args = {'is_global': True} service = get_resource_service('publish_filters') global_filters = list(service.get(req=req, lookup=None)) for subscriber in subscribers: if target_media_type: can_send_takes_packages = subscriber['subscriber_type'] == SUBSCRIBER_TYPES.DIGITAL if target_media_type == WIRE and can_send_takes_packages or target_media_type == DIGITAL \ and not can_send_takes_packages: continue if doc.get('targeted_for'): matching_target = [t for t in doc.get('targeted_for') if t['name'] == subscriber.get('subscriber_type', '') or t['name'] == subscriber.get('geo_restrictions', '')] if len(matching_target) > 0 and matching_target[0]['allow'] is False: continue if not self.conforms_global_filter(subscriber, global_filters, doc): continue if not self.conforms_publish_filter(subscriber, doc): continue filtered_subscribers.append(subscriber) return filtered_subscribers
def _get_broadcast_items(self, ids, include_archived_repo=False): """ Get the broadcast items for the master_id and takes_package_id :param list ids: list of item ids :param include_archived_repo True if archived repo needs to be included in search, default is False :return list: list of broadcast items """ query = { 'query': { 'filtered': { 'filter': { 'bool': { 'must': {'term': {'genre.name': BROADCAST_GENRE}}, 'should': [ {'terms': {'broadcast.master_id': ids}}, {'terms': {'broadcast.takes_package_id': ids}} ] } } } } } req = ParsedRequest() repos = 'archive,published' if include_archived_repo: repos = 'archive,published,archived' req.args = {'source': json.dumps(query), 'repo': repos} return get_resource_service('search').get(req=req, lookup=None)
def test_should_highlight(self): with self.app.app_context(): req = ParsedRequest() req.args = {'es_highlight': 1} self.assertTrue(self.app.data.should_highlight(req)) req.args = {'es_highlight': '0'} self.assertFalse(self.app.data.should_highlight(req))
def get_expired_items(self, expiry_datetime, invalid_only=False): """Get the expired items. Where content state is not scheduled and the item matches given parameters :param datetime expiry_datetime: expiry datetime :param bool invalid_only: True only invalid items :return pymongo.cursor: expired non published items. """ query = { '$and': [ {'expiry': {'$lte': date_to_str(expiry_datetime)}}, {'$or': [ {'task.desk': {'$ne': None}}, {ITEM_STATE: CONTENT_STATE.SPIKED, 'task.desk': None} ]} ] } if invalid_only: query['$and'].append({'expiry_status': 'invalid'}) else: query['$and'].append({'expiry_status': {'$ne': 'invalid'}}) req = ParsedRequest() req.max_results = config.MAX_EXPIRY_QUERY_LIMIT req.sort = 'expiry,_created' return self.get_from_mongo(req=req, lookup=query)
def test_using_repo_request_attribute(self): with self.app.app_context(): req = ParsedRequest() req.args = {'repo': 'ingest'} docs = self.app.data.find('search', req, None) self.assertEquals(1, docs.count()) self.assertEquals('ingest', docs[0]['_type'])
def get_expired_items(expired_date_time, limit=100): """ Fetches the expired articles from published collection. Expiry Conditions: 1. can_be_removed flag is True 2. Item Expiry is less than or equal to expired_date_time, State of the Item is not SCHEDULED and allow_post_publish_actions flag is True :param expired_date_time: :param limit: :return: expired articles from published collection """ logger.info('Get expired content from published') query = { '$or': [ {'can_be_removed': True}, {'$and': [ {'expiry': {'$lte': expired_date_time}}, {ITEM_STATE: {'$ne': CONTENT_STATE.SCHEDULED}}, {'allow_post_publish_actions': True} ]} ] } req = ParsedRequest() req.sort = '_created' req.max_results = limit return superdesk.get_resource_service('published').get_from_mongo(req=req, lookup=query)
def can_format(self, format_type, article): if format_type != 'AAP SMS' or article[ITEM_TYPE] != CONTENT_TYPE.TEXT \ or article.get(ITEM_STATE, '') == CONTENT_STATE.KILLED \ or not article.get('flags', {}).get('marked_for_sms', False): return False # need to check that a story with the same sms_message has not been published to SMS before query = {"query": { "filtered": { "filter": { "and": [ {"term": {"state": CONTENT_STATE.PUBLISHED}}, {"term": {"sms_message": article.get('sms_message', article.get('abstract', ''))}}, {"term": {"flags.marked_for_sms": True}}, {"not": {"term": {"queue_state": "in_progress"}}} ] } } } } req = ParsedRequest() req.args = {'source': json.dumps(query)} published = superdesk.get_resource_service('published').get(req=req, lookup=None) if published and published.count(): return False return article.get('flags', {}).get('marked_for_sms', False)
def test_args_filter(self): with self.app.app_context(): self.app.data.insert('items', [{'uri': 'foo'}, {'uri': 'bar'}]) req = ParsedRequest() req.args = {} req.args['filter'] = json.dumps({'term': {'uri': 'foo'}}) self.assertEqual(1, self.app.data.find('items', req, None).count())
def enhance_with_archive_items(self, items): if items: ids = list(set([item.get('item_id') for item in items if item.get('item_id')])) archive_items = [] if ids: query = {'$and': [{config.ID_FIELD: {'$in': ids}}]} archive_req = ParsedRequest() archive_req.max_results = len(ids) # can't access published from elastic due filter on the archive resource hence going to mongo archive_items = list(superdesk.get_resource_service(ARCHIVE) .get_from_mongo(req=archive_req, lookup=query)) takes_service = TakesPackageService() for item in archive_items: handle_existing_data(item) takes_service.enhance_with_package_info(item) for item in items: archive_item = [i for i in archive_items if i.get(config.ID_FIELD) == item.get('item_id')] archive_item = archive_item[0] if len(archive_item) > 0 else \ {config.VERSION: item.get(config.VERSION, 1)} updates = { config.ID_FIELD: item.get('item_id'), 'item_id': item.get(config.ID_FIELD), 'lock_user': archive_item.get('lock_user', None), 'lock_time': archive_item.get('lock_time', None), 'lock_session': archive_item.get('lock_session', None), 'archive_item': archive_item if archive_item else None } item.update(updates) handle_existing_data(item)
def test_resource_filter(self): with self.app.app_context(): self.app.data.insert('items_with_description', [{'uri': 'foo', 'description': 'test'}, {'uri': 'bar'}]) req = ParsedRequest() req.args = {} req.args['source'] = json.dumps({'query': {'filtered': {'filter': {'term': {'uri': 'bar'}}}}}) self.assertEqual(0, self.app.data.find('items_with_description', req, None).count())
def test_sub_resource_lookup_with_schema_filter(self): with self.app.app_context(): self.app.data.insert('items_with_description', [{'uri': 'foo', 'description': 'test', 'name': 'foo'}]) req = ParsedRequest() req.args = {} self.assertEqual(1, self.app.data.find('items_with_description', req, {'name': 'foo'}).count()) self.assertEqual(0, self.app.data.find('items_with_description', req, {'name': 'bar'}).count())
def test_remove_with_query(self): with self.app.app_context(): self.app.data.insert('items', [{'uri': 'foo'}, {'uri': 'bar'}]) self.app.data.remove('items', {'query': {'term': {'uri': 'bar'}}}) req = ParsedRequest() req.args = {} self.assertEqual(1, self.app.data.find('items', req, None).count())
def get_published_items_by_moved_to_legal(self, item_ids, move_to_legal): """Get the pulished items where flag is moved. :param list item_ids: List of item :param bool move_to_legal: move_to_legal boolean flag :return: list of published items """ if item_ids: try: query = { "query": { "filtered": { "filter": { "and": [{"terms": {"item_id": item_ids}}, {"term": {"moved_to_legal": move_to_legal}}] } } } } request = ParsedRequest() request.args = {"source": json.dumps(query)} return list(super().get(req=request, lookup=None)) except: logger.exception( "Failed to get published items " "by moved to legal: {} -- ids: {}.".format(move_to_legal, item_ids) ) return []
def update_media_references(self, updates, original, published=False): """Update the media references collection. When item (media item or associated media) is updated or created, media_references are created. These media_references are updated to published state once the item is published. :param dict updates: Updates of the item :param dict original: Original item :param boolean published: True if publishing the item else False """ item_id = original.get(config.ID_FIELD) references = {} if updates.get('renditions', original.get('renditions', {})): references = { item_id: updates.get('renditions', original.get('renditions', {})) } if original.get(ITEM_TYPE) not in MEDIA_TYPES: associations = updates.get(ASSOCIATIONS) or original.get(ASSOCIATIONS) if not associations: return references = {assoc.get(config.ID_FIELD): assoc.get('renditions') for assoc in associations.values() if assoc and assoc.get('renditions')} if not references: return for assoc_id, renditions in references.items(): associated_id = assoc_id if assoc_id != item_id else None for rendition in renditions.values(): if not rendition.get('media'): continue media = str(rendition.get('media')) reference = get_resource_service('media_references').find_one(req=None, item_id=item_id, media_id=media) if not reference: try: get_resource_service('media_references').post([{'item_id': item_id, 'media_id': media, 'associated_id': associated_id, 'published': False}]) except: logger.exception('Failed to insert media reference item {} media {}'.format(item_id, media)) # item is publish if not published: return req = ParsedRequest() req.where = json.dumps({'item_id': item_id, 'published': False}) refs = list(get_resource_service('media_references').get(req=req, lookup=None)) for ref in refs: try: get_resource_service('media_references').patch(ref.get(config.ID_FIELD), updates={'published': True}) except: logger.exception('Failed to update media ' 'reference item {} media {}'.format(ref.get("item_id"), ref.get("media_id")))
def test_remove_by_id(self): with self.app.app_context(): self.ids = self.app.data.insert('items', [{'uri': 'foo'}, {'uri': 'bar'}]) self.app.data.remove('items', {'_id': self.ids[0]}) req = ParsedRequest() req.args = {} self.assertEqual(1, self.app.data.find('items', req, None).count())
def on_create(self, docs): """ Overriding this to set desk_order and expiry settings. Also, if this stage is defined as either working or incoming stage or both then removes the old incoming and working stages. """ for doc in docs: desk = doc.get('desk') if not desk: doc['desk_order'] = 1 continue req = ParsedRequest() req.sort = '-desk_order' req.max_results = 1 prev_stage = self.get(req=req, lookup={'desk': doc['desk']}) if doc.get('content_expiry') == 0: doc['content_expiry'] = None if prev_stage.count() == 0: doc['desk_order'] = 1 else: doc['desk_order'] = prev_stage[0].get('desk_order', 1) + 1 # if this new one is default then remove the old default if doc.get('working_stage', False): self.remove_old_default(desk, 'working_stage') if doc.get('default_incoming', False): self.remove_old_default(desk, 'default_incoming')
def test_versions_across_collections_after_publish(self): self.app.data.insert('archive_versions', self.article_versions) # Publishing an Article doc = self.articles[3] original = doc.copy() published_version_number = original[config.VERSION] + 1 get_resource_service(ARCHIVE_PUBLISH).patch(id=doc[config.ID_FIELD], updates={ITEM_STATE: CONTENT_STATE.PUBLISHED, config.VERSION: published_version_number}) article_in_production = get_resource_service(ARCHIVE).find_one(req=None, _id=original[config.ID_FIELD]) self.assertIsNotNone(article_in_production) self.assertEqual(article_in_production[ITEM_STATE], CONTENT_STATE.PUBLISHED) self.assertEqual(article_in_production[config.VERSION], published_version_number) enqueue_published() lookup = {'item_id': original[config.ID_FIELD], 'item_version': published_version_number} queue_items = list(get_resource_service(PUBLISH_QUEUE).get(req=None, lookup=lookup)) assert len(queue_items) > 0, \ "Transmission Details are empty for published item %s" % original[config.ID_FIELD] lookup = {'item_id': original[config.ID_FIELD], config.VERSION: published_version_number} request = ParsedRequest() request.args = {'aggregations': 0} items_in_published_collection = list(get_resource_service(PUBLISHED).get(req=request, lookup=lookup)) assert len(items_in_published_collection) > 0, \ "Item not found in published collection %s" % original[config.ID_FIELD]
def _find_articles_to_kill(self, lookup): """ Finds the article to kill. If the article is associated with Digital Story then Digital Story will also be fetched. If the Digital Story has more takes then all of them would be fetched. :param lookup: query to find the main article to be killed :type lookup: dict :return: list of articles to be killed :rtype: list """ archived_doc = self.find_one(req=None, **lookup) req = ParsedRequest() req.sort = '[("%s", -1)]' % config.VERSION archived_doc = list(self.get(req=req, lookup={'item_id': archived_doc['item_id']}))[0] articles_to_kill = [archived_doc] takes_package_service = TakesPackageService() takes_package_id = takes_package_service.get_take_package_id(archived_doc) if takes_package_id: takes_package = list(self.get(req=req, lookup={'item_id': takes_package_id}))[0] articles_to_kill.append(takes_package) for takes_ref in takes_package_service.get_package_refs(takes_package): if takes_ref[RESIDREF] != archived_doc[GUID_FIELD]: take = list(self.get(req=req, lookup={'item_id': takes_ref[RESIDREF]}))[0] articles_to_kill.append(take) return articles_to_kill
def _find_other_sluglines(self, family_id, slugline, versioncreated, desk_id): """ This function given a family_id will return a tuple with the first value true if there is a more recent story in the family, the second value in the tuple will be a list of any sluglines that might exist for the family that are different to the one passed. :param family_id: :param slugline: :param versioncreated: :param desk_id: :return: A tuple as described above """ older_sluglines = [] req = ParsedRequest() query = { "query": { "filtered": {"filter": {"and": [{"term": {"family_id": family_id}}, {"term": {"task.desk": desk_id}}]}} } } req.args = {"source": json.dumps(query), "aggregations": 0} family = superdesk.get_resource_service("published").get(req=req, lookup=None) for member in family: member_slugline = self._get_slugline_with_legal(member) if member_slugline.lower() != slugline.lower(): if member.get("versioncreated") < versioncreated: if member_slugline not in older_sluglines: older_sluglines.append(member_slugline) else: return (True, []) return (False, older_sluglines)
def get(self, req, lookup): """ Return a list of items related to the given item. The given item id is retrieved from the lookup dictionary as 'item_id' """ if 'item_id' not in lookup: raise SuperdeskApiError.badRequestError('The item identifier is required') item = get_resource_service('archive_autosave').find_one(req=None, _id=lookup['item_id']) if not item: item = get_resource_service('archive').find_one(req=None, _id=lookup['item_id']) if not item: raise SuperdeskApiError.notFoundError('Invalid item identifer') keywords = self.provider.get_keywords(self._transform(item)) if not keywords: return ElasticCursor([]) query = { 'query': { 'filtered': { 'query': { 'query_string': { 'query': ' '.join(kwd['text'] for kwd in keywords) } } } } } req = ParsedRequest() req.args = {'source': json.dumps(query), 'repo': 'archive,published,archived'} return get_resource_service('search').get(req=req, lookup=None)
def test_sub_resource_lookup(self): with self.app.app_context(): self.app.data.insert('items', [{'uri': 'foo', 'name': 'foo'}]) req = ParsedRequest() req.args = {} self.assertEqual(1, self.app.data.find('items', req, {'name': 'foo'}).count()) self.assertEqual(0, self.app.data.find('items', req, {'name': 'bar'}).count())
def get_published_items(): """ Get all items with queue state: "pending" that are not scheduled or scheduled time has lapsed. """ query = { QUEUE_STATE: PUBLISH_STATE.PENDING, '$or': [{ ITEM_STATE: { '$ne': CONTENT_STATE.SCHEDULED } }, { ITEM_STATE: CONTENT_STATE.SCHEDULED, '{}.utc_{}'.format(SCHEDULE_SETTINGS, PUBLISH_SCHEDULE): { '$lte': utcnow() } }] } request = ParsedRequest() request.sort = 'publish_sequence_no' request.max_results = 200 return list( get_resource_service(PUBLISHED).get_from_mongo(req=request, lookup=query))
def _get_changed_items(self, existing_items, package): """ Returns the added and removed items from existing_items :param existing_items: Existing list :param updates: Changes :return: list of removed items and list of added items """ published_service = get_resource_service('published') req = ParsedRequest() query = {'query': {'filtered': {'filter': {'and': [{'term': {QUEUE_STATE: PUBLISH_STATE.QUEUED}}, {'term': {'item_id': package['item_id']}}]}}}, 'sort': [{'publish_sequence_no': 'desc'}]} req.args = {'source': json.dumps(query)} req.max_results = 1000 previously_published_packages = published_service.get(req=req, lookup=None) previously_published_package = previously_published_packages[0] if 'groups' in previously_published_package: old_items = self.package_service.get_residrefs(previously_published_package) added_items = list(set(existing_items) - set(old_items)) removed_items = list(set(old_items) - set(existing_items)) return removed_items, added_items else: return [], []
def get_published_items_by_moved_to_legal(self, item_ids, move_to_legal): """Get the pulished items where flag is moved. :param list item_ids: List of item :param bool move_to_legal: move_to_legal boolean flag :return: list of published items """ if item_ids: try: query = { 'query': { 'filtered': { 'filter': { 'and': [{ 'terms': { 'item_id': item_ids } }, { 'term': { 'moved_to_legal': move_to_legal } }] } } } } request = ParsedRequest() request.args = {'source': json.dumps(query)} return list(super().get(req=request, lookup=None)) except: logger.exception('Failed to get published items ' 'by moved to legal: {} -- ids: {}.'.format( move_to_legal, item_ids)) return []
def _find_articles_to_kill(self, lookup): """Finds the article to kill. If the article is associated with Digital Story then Digital Story will also be fetched. If the Digital Story has more takes then all of them would be fetched. :param lookup: query to find the main article to be killed :type lookup: dict :return: list of articles to be killed :rtype: list """ archived_doc = self.find_one(req=None, **lookup) req = ParsedRequest() req.sort = '[("%s", -1)]' % config.VERSION archived_doc = list( self.get(req=req, lookup={'item_id': archived_doc['item_id']}))[0] articles_to_kill = [archived_doc] takes_package_service = TakesPackageService() takes_package_id = takes_package_service.get_take_package_id( archived_doc) if takes_package_id: takes_package = list( self.get(req=req, lookup={'item_id': takes_package_id}))[0] articles_to_kill.append(takes_package) for takes_ref in takes_package_service.get_package_refs( takes_package): if takes_ref[RESIDREF] != archived_doc[GUID_FIELD]: take = list( self.get(req=req, lookup={'item_id': takes_ref[RESIDREF]}))[0] articles_to_kill.append(take) return articles_to_kill
def get_mongo_items(self, consistency_record): # get the records from mongo in chunks projection = dict(superdesk.resources[ self.resource_name].endpoint_schema['datasource']['projection']) superdesk.resources[self.resource_name].endpoint_schema['datasource'][ 'projection'] = None service = superdesk.get_resource_service(self.resource_name) cursor = service.get_from_mongo(None, {}) count = cursor.count() no_of_buckets = len(range(0, count, self.default_page_size)) mongo_items = [] updated_mongo_items = [] request = ParsedRequest() request.projection = json.dumps({'_etag': 1, '_updated': 1}) for x in range(0, no_of_buckets): skip = x * self.default_page_size print('Page : {}, skip: {}'.format(x + 1, skip)) # don't get any new records since the elastic items are retrieved cursor = service.get_from_mongo( request, {'_created': { '$lte': consistency_record['started_at'] }}) cursor.skip(skip) cursor.limit(self.default_page_size) cursor = list(cursor) mongo_items.extend([(mongo_item['_id'], mongo_item['_etag']) for mongo_item in cursor]) updated_mongo_items.extend([ mongo_item['_id'] for mongo_item in cursor if mongo_item['_updated'] > consistency_record['started_at'] ]) superdesk.resources[self.resource_name].endpoint_schema['datasource'][ 'projection'] = projection return mongo_items, updated_mongo_items
def get(self, req, lookup): """ Overriding to pass user as a search parameter """ session_user = str(get_user_id(required=True)) if not req: req = ParsedRequest() if lookup: req.where = json.dumps( {'$or': [{ 'is_global': True }, { 'user': session_user }, lookup]}) else: req.where = json.dumps( {'$or': [{ 'is_global': True }, { 'user': session_user }]}) return super().get(req, lookup=None)
def _get_items(self, resource, query, sort, keys, callback): req = ParsedRequest() cursor = get_resource_service(resource).get_from_mongo(req=req, lookup=query) count = cursor.count() no_of_buckets = len(range(0, count, self.default_page_size)) items = {} req.sort = sort for bucket in range(0, no_of_buckets): skip = bucket * self.default_page_size logger.info('Page : {}, skip: {}'.format(bucket + 1, skip)) cursor = get_resource_service(resource).get_from_mongo( req=req, lookup=query) cursor.skip(skip) cursor.limit(self.default_page_size) cursor = list(cursor) items.update({ callback(item): {key: item.get(key) for key in keys if key in item} for item in cursor }) return items
def test_prefill_search_company(client, app): with app.test_request_context(): session['user'] = None search = SearchQuery() service.prefill_search_query(search) assert search.user is None assert search.company is None session['user'] = ADMIN_USER_ID search = SearchQuery() service.prefill_search_query(search) assert search.company.get('_id') == COMPANY_1 session['user'] = PUBLIC_USER_ID search = SearchQuery() service.prefill_search_query(search) assert search.company.get('_id') == COMPANY_2 session['user'] = ADMIN_USER_ID search = SearchQuery() req = ParsedRequest() req.args = {'user': TEST_USER_ID} service.prefill_search_query(search, req) assert search.company.get('_id') == COMPANY_3
def get_highlighted_items(highlights_id): """Get items marked for given highlight and passing date range query.""" highlight = get_resource_service('highlights').find_one(req=None, _id=highlights_id) query = { 'query': { 'filtered': { 'filter': { 'and': [ { 'range': { 'versioncreated': { 'gte': highlight.get('auto_insert', 'now/d') } } }, { 'term': { 'highlights': str(highlights_id) } }, ] } } }, 'sort': [ { 'versioncreated': 'desc' }, ], 'size': 200 } request = ParsedRequest() request.args = {'source': json.dumps(query), 'repo': 'archive,published'} return list(get_resource_service('search').get(req=request, lookup=None))
def get_last_published_version(self, _id): """Returns the last published entry for the passed item id :param _id: :return: """ try: query = { "query": { "filtered": { "filter": { "bool": {"must": [{"term": {"item_id": _id}}, {"term": {LAST_PUBLISHED_VERSION: True}}]} } } } } request = ParsedRequest() request.args = {"source": json.dumps(query), "repo": "published"} items = list(self.get(req=request, lookup=None)) if items: return items[0] except Exception: return None
def _get_subscribers_by_filter_condition(self, filter_condition): """ Searches all subscribers that has a content filter with the given filter condition If filter condition is used in a global filter then it returns all subscribers that not disabled the global filter. :param filter_condition: Filter condition to test :return: List of subscribers """ req = ParsedRequest() all_subscribers = list(super().get(req=req, lookup=None)) selected_products = {} selected_subscribers = {} selected_content_filters = {} filter_condition_service = get_resource_service('filter_conditions') content_filter_service = get_resource_service('content_filters') product_service = get_resource_service('products') existing_products = list(product_service.get(req=req, lookup=None)) existing_filter_conditions = filter_condition_service.check_similar(filter_condition) for fc in existing_filter_conditions: existing_content_filters = content_filter_service.get_content_filters_by_filter_condition(fc['_id']) for pf in existing_content_filters: selected_content_filters[pf['_id']] = pf if pf.get('is_global', False): for s in all_subscribers: gfs = s.get('global_filters', {}) if gfs.get(str(pf['_id']), True): selected_subscribers[s['_id']] = s for product in existing_products: if product.get('content_filter') and \ 'filter_id' in product['content_filter'] and \ product['content_filter']['filter_id'] == pf['_id']: selected_products[product['_id']] = product for s in all_subscribers: for p in s.get('products', []): if p in selected_products: selected_subscribers[s['_id']] = s res = {'filter_conditions': existing_filter_conditions, 'content_filters': list(selected_content_filters.values()), 'products': list(selected_products.values()), 'selected_subscribers': list(selected_subscribers.values())} return [res]
def is_empty(self, resource): """ Returns True if the collection is empty; False otherwise. While a user could rely on self.find() method to achieve the same result, this method can probably take advantage of specific datastore features to provide better perfomance. Don't forget, a 'resource' could have a pre-defined filter. If that is the case, it will have to be taken into consideration when performing the is_empty() check (see eve.io.mongo.mongo.py implementation). :param resource: resource being accessed. You should then use the ``datasource`` helper function to retrieve the actual datasource name. """ resource_collection = self.find(resource, ParsedRequest(), None) return resource_collection.count() == 0
def _get_subscribers_for_previously_sent_items(self, lookup): """ Returns list of subscribers that have previously received the item. :param dict lookup: elastic query to filter the publish queue :return: list of subscribers and list of product codes per subscriber """ req = ParsedRequest() subscribers = [] subscriber_codes = {} queued_items = list(get_resource_service('publish_queue').get(req=req, lookup=lookup)) if len(queued_items) > 0: subscriber_ids = {queued_item['subscriber_id'] for queued_item in queued_items} subscriber_codes = {q['subscriber_id']: q['codes'] for q in queued_items} query = {'$and': [{config.ID_FIELD: {'$in': list(subscriber_ids)}}]} subscribers = list(get_resource_service('subscribers').get(req=None, lookup=query)) return subscribers, subscriber_codes
def get(self, req, lookup): resource_def = app.config['DOMAIN']['items'] id_field = versioned_id_field(resource_def) if req and req.args and req.args.get(config.ID_FIELD): version_history = list(super().get_from_mongo( req=ParsedRequest(), lookup={id_field: req.args.get(config.ID_FIELD)})) else: version_history = list(super().get_from_mongo(req=req, lookup=lookup)) for doc in version_history: doc[config.ID_FIELD] = doc[id_field] return ListCursor(version_history)
def test_removing_expired_items_from_elastic_only(self): now = utcnow() self.app.data.elastic.insert('ingest', [ { '_id': 'foo', 'expiry': now - timedelta(minutes=30) }, { '_id': 'bar', 'expiry': now + timedelta(minutes=30) }, ]) RemoveExpiredContent().run() self.assertEqual( 1, self.app.data.elastic.find('ingest', ParsedRequest(), {}).count())
def delete(self, endpoint_name, lookup): """Delete method to delete by using mongo query syntax. :param endpoint_name: Name of the endpoint :param lookup: User mongo query syntax. example 1. ``{'_id':123}``, 2. ``{'item_id': {'$in': [123, 234]}}`` :returns: Returns list of ids which were removed. """ docs = list( self.get_from_mongo(endpoint_name, lookup=lookup, req=ParsedRequest()).sort("_id", 1)) removed_ids = self.delete_docs(endpoint_name, docs) if len(docs) and not len(removed_ids): logger.warn( "No documents for %s resource were deleted using lookup %s", endpoint_name, lookup) return removed_ids
def test_prefill_search_products__requested_products(client, app): with app.test_request_context(): search = SearchQuery() service.prefill_search_query(search) assert search.requested_products == [] search = SearchQuery() req = ParsedRequest() req.args = {'requested_products': '{},{},{}'.format(PROD_1, PROD_2, PROD_3)} service.prefill_search_query(search, req) assert search.requested_products == [str(PROD_1), str(PROD_2), str(PROD_3)] search = SearchQuery() req = ParsedRequest() req.args = {'requested_products': [str(PROD_1), str(PROD_2), str(PROD_3)]} service.prefill_search_query(search, req) assert search.requested_products == [str(PROD_1), str(PROD_2), str(PROD_3)] search = SearchQuery() req = ParsedRequest() req.args = {'requested_products': {'test': PROD_3}} with raises(BadParameterValueError): service.prefill_search_query(search, req)
def _get_field_values(self): values = {} vocabularies_resource = get_resource_service('vocabularies') values['anpa_category'] = vocabularies_resource.find_one( req=None, _id='categories')['items'] req = ParsedRequest() req.where = json.dumps( {'$or': [{ "schema_field": "genre" }, { "_id": "genre" }]}) genre = vocabularies_resource.get(req=req, lookup=None) if genre.count(): values['genre'] = genre[0]['items'] values['urgency'] = vocabularies_resource.find_one( req=None, _id='urgency')['items'] values['priority'] = vocabularies_resource.find_one( req=None, _id='priority')['items'] values['type'] = vocabularies_resource.find_one(req=None, _id='type')['items'] subject = vocabularies_resource.find_one(req=None, schema_field='subject') if subject: values['subject'] = subject['items'] else: values['subject'] = get_subjectcodeitems() values['desk'] = list(get_resource_service('desks').get(None, {})) values['stage'] = self._get_stage_field_values(values['desk']) values['sms'] = [{ 'qcode': 0, 'name': 'False' }, { 'qcode': 1, 'name': 'True' }] req = ParsedRequest() req.where = json.dumps({ '$or': [{ "schema_field": "place" }, { "_id": "place" }, { "_id": "locators" }] }) place = vocabularies_resource.get(req=req, lookup=None) if place.count(): values['place'] = place[0]['items'] values['ingest_provider'] = list( get_resource_service('ingest_providers').get(None, {})) return values
def _get_subscribers_for_previously_sent_items(self, lookup): """Returns list of subscribers that have previously received the item. :param dict lookup: elastic query to filter the publish queue :return: list of subscribers and list of product codes per subscriber """ req = ParsedRequest() subscribers = [] subscriber_codes = {} associations = {} queued_items = list( get_resource_service("publish_queue").get(req=req, lookup=lookup)) if len(queued_items) > 0: subscriber_ids = {} for queue_item in queued_items: subscriber_id = queue_item["subscriber_id"] if not subscriber_ids.get(subscriber_id): subscriber_ids[subscriber_id] = False if queue_item.get( "destination", {}).get("delivery_type") == "content_api": subscriber_ids[subscriber_id] = True subscriber_codes[subscriber_id] = queue_item.get("codes", []) if queue_item.get("associated_items"): associations[subscriber_id] = list( set(associations.get(subscriber_id, [])) | set(queue_item.get("associated_items", []))) query = { "$and": [{ config.ID_FIELD: { "$in": list(subscriber_ids.keys()) } }] } subscribers = list( get_resource_service("subscribers").get(req=None, lookup=query)) for s in subscribers: s["api_enabled"] = subscriber_ids.get(s.get(config.ID_FIELD)) return subscribers, subscriber_codes, associations
def search_repos(self, repo, args, page=1, page_size=None, projections=None): req = ParsedRequest() req.args = MultiDict() req.args['repo'] = repo req.args.update(args) if projections is not None: req.args['projections'] = json.dumps(projections) req.page = page req.max_results = page_size or self.default_page_size return self.get(req=req, lookup=None)
def get(self, req, lookup): """ Version of an article in Legal Archive isn't maintained by Eve. Overriding this to fetch the version history. """ resource_def = app.config['DOMAIN'][LEGAL_ARCHIVE_NAME] id_field = versioned_id_field(resource_def) if req and req.args and req.args.get(config.ID_FIELD): version_history = list(super().get_from_mongo(req=ParsedRequest(), lookup={id_field: req.args.get(config.ID_FIELD)})) else: version_history = list(super().get_from_mongo(req=req, lookup=lookup)) for doc in version_history: doc[config.ID_FIELD] = doc[id_field] self.enhance(doc) return ListCursor(version_history)
def setUp(self): super().setUp() self.req = ParsedRequest() with self.app.test_request_context(URL_PREFIX): self.dictionaries = [{ '_id': '1', 'name': 'Eng', 'language_id': 'en' }, { '_id': '2', 'name': 'Eng AUs', 'language_id': 'en-AU', 'is_active': 'true' }, { '_id': '3', 'name': 'French', 'language_id': 'fr' }] self.app.data.insert('dictionaries', self.dictionaries)
def _search_events(self, request, params, query, search_filter): page = request.page or 1 page_size = self._get_page_size(request, search_filter) req = ParsedRequest() req.args = MultiDict() req.args['source'] = json.dumps({ 'query': query['query'], 'sort': query['sort'] if query.get('sort') else { 'dates.start': { 'order': 'asc' } }, 'size': page_size, 'from': (page - 1) * page_size }) req.args['repos'] = 'events' req.page = page req.max_results = page_size if params.get('projections'): req.args['projections'] = params['projections'] return get_resource_service('planning_search').get(req=req, lookup=None)
def delete(self, endpoint_name, lookup): """ Delete method to delete by using mongo query syntax :param endpoint_name: Name of the endpoint :param lookup: User mongo query syntax. example 1. {'_id':123}, 2. {'item_id': {'$in': [123, 234]}} :returns: Returns the mongo remove command response. {'n': 12, 'ok': 1} """ backend = self._backend(endpoint_name) search_backend = self._lookup_backend(endpoint_name) docs = self.get_from_mongo(endpoint_name, lookup=lookup, req=ParsedRequest()) ids = [doc[config.ID_FIELD] for doc in docs] res = backend.remove(endpoint_name, {config.ID_FIELD: {'$in': ids}}) if res and res.get('n', 0) > 0 and search_backend is not None: self._remove_documents_from_search_backend(endpoint_name, ids) if res and res.get('n', 0) == 0: logger.warn("No documents for {} resource were deleted using lookup {}".format(endpoint_name, lookup)) return res
def find_one(self, req, **lookup): """Retrieve a specific item. :param req: object representing the HTTP request :type req: `eve.utils.ParsedRequest` :param dict lookup: requested item lookup, contains its ID :return: requested item (if found) :rtype: dict or None """ if req is None: req = ParsedRequest() allowed_params = {'include_fields', 'exclude_fields', 'version'} self._check_for_unknown_params( req, whitelist=allowed_params, allow_filtering=False) self._set_fields_filter(req) # Eve's "projection" return super().find_one(req, **lookup)
def test_removing_expired_items_from_elastic_only(self): now = utcnow() self.app.data.elastic.insert( "ingest", [ { "_id": "foo", "expiry": now - timedelta(minutes=30) }, { "_id": "bar", "expiry": now + timedelta(minutes=30) }, ], ) RemoveExpiredContent().run() self.assertEqual( 1, self.app.data.elastic.find("ingest", ParsedRequest(), {}).count())
def _filter_item(self, item): """ Filter the item out if it matches any API Block filter conditions :param item: :return: True of the item is blocked, False if it is OK to publish it on the API. """ # Get the API blocking Filters req = ParsedRequest() filter_conditions = list(get_resource_service('content_filters').get(req=req, lookup={'api_block': True})) # No API blocking filters if not filter_conditions: return False filter_service = get_resource_service('content_filters') for fc in filter_conditions: if filter_service.does_match(fc, item): logger.info('API Filter block {} matched for item {}.'.format(fc, item.get(config.ID_FIELD))) return True return False
def test_no_force_refresh(self): with self.app.app_context(): self.app.config['ELASTICSEARCH_FORCE_REFRESH'] = False ids = self.app.data.insert('items', [ { 'uri': 'foo', 'name': 'foo' }, { 'uri': 'bar', 'name': 'bar' }, ]) item = self.app.data.find_one('items', req=None, _id=ids[0]) self.assertEqual('foo', item['uri']) time.sleep(2) req = ParsedRequest() cursor = self.app.data.find('items', req, None) self.assertEqual(2, cursor.count())
def _search_planning(self, request, params, query, search_filter): # params = request.args or MultiDict() # query = construct_planning_search_query(params) page = request.page or 1 page_size = self._get_page_size(request, search_filter) req = ParsedRequest() req.args = MultiDict() req.args['source'] = json.dumps({ 'query': query['query'], 'sort': query['sort'] if query.get('sort') else self._get_sort(), 'size': page_size, 'from': (page - 1) * page_size }) req.args['repos'] = 'planning' req.page = page req.max_results = page_size if params.get('projections'): req.args['projections'] = params['projections'] return get_resource_service('planning_search').get(req=req, lookup=None)
def test_query_sort_by_name_case_insensetive(self): service = get_resource_service('concept_items') names = [ 'A Message to Garcia', 'and then there were none', 'Bootstrap: Responsive Web Development', 'Hobbit', 'Lord of the rings', 'the Elegance of the Hedgehog', 'The Little Prince', 'Гайдамаки' ] req = ParsedRequest() req.sort = 'name' req.args = ImmutableMultiDict([('collation', '{"locale": "en", "strength":"1"}')]) cursor = service.get_from_mongo(req=req, lookup={}) self.assertEqual([i['name'] for i in cursor], names) req = ParsedRequest() req.sort = '-name' req.args = ImmutableMultiDict([('collation', '{"locale": "en", "strength":"1"}')]) names.reverse() cursor = service.get_from_mongo(req=req, lookup={}) self.assertEqual([i['name'] for i in cursor], names)