def find(self, resource, lookup, projection, **options): req = ParsedRequest() req.args = {} req.projection = projection return self.data_layer.get(resource, req, lookup)
def get(self, req, lookup): if req is None: req = ParsedRequest() return self.backend.get('archive_versions', req=req, lookup=lookup)
def test_softdelete_datalayer(self): """Soft deleted items should not be returned by find methods in the Eve data layer unless show_deleted is explicitly configured in the request, the deleted field is included in the lookup, or the operation is 'raw'. """ # Soft delete item r, status = self.delete(self.item_id_url, headers=self.etag_headers) self.assert204(status) with self.app.test_request_context(): # find_one should only return item if a request w/ show_deleted == # True is passed or if the deleted field is part of the lookup req = ParsedRequest() doc = self.app.data.find_one(self.known_resource, req, _id=self.item_id) self.assertEqual(doc, None) req.show_deleted = True doc = self.app.data.find_one(self.known_resource, req, _id=self.item_id) self.assertNotEqual(doc, None) self.assertEqual(doc.get(self.deleted_field), True) req.show_deleted = False doc = self.app.data.find_one(self.known_resource, req, _id=self.item_id, _deleted=True) self.assertNotEqual(doc, None) self.assertEqual(doc.get(self.deleted_field), True) # find_one_raw should always return a document, soft deleted or not doc = self.app.data.find_one_raw(self.known_resource, _id=ObjectId(self.item_id)) self.assertNotEqual(doc, None) self.assertEqual(doc.get(self.deleted_field), True) # find should only return deleted items if a request with # show_deleted == True is passed or if the deleted field is part of # the lookup req.show_deleted = False docs = self.app.data.find(self.known_resource, req, None) undeleted_count = docs.count() req.show_deleted = True docs = self.app.data.find(self.known_resource, req, None) with_deleted_count = docs.count() self.assertEqual(undeleted_count, with_deleted_count - 1) req.show_deleted = False docs = self.app.data.find(self.known_resource, req, {self.deleted_field: True}) deleted_count = docs.count() self.assertEqual(deleted_count, 1) # find_list_of_ids will return deleted documents if given their id docs = self.app.data.find_list_of_ids(self.known_resource, [ObjectId(self.item_id)]) self.assertEqual(docs.count(), 1)
def setUp(self): self.req = ParsedRequest() with self.app.test_request_context(self.app.config.get('URL_PREFIX')): self.articles = [{ '_id': '1', 'urgency': 1, 'headline': 'story', 'state': 'fetched' }, { '_id': '2', 'headline': 'prtorque', 'state': 'fetched' }, { '_id': '3', 'urgency': 3, 'state': 'fetched', 'flags': { 'marked_for_sms': True } }, { '_id': '4', 'urgency': 4, 'state': 'fetched', 'task': { 'desk': '1' }, 'ingest_provider': '1' }, { '_id': '5', 'urgency': 2, 'state': 'fetched', 'task': { 'desk': '2' }, 'priority': 3 }, { '_id': '6', 'state': 'fetched', 'embargo': utcnow(), 'schedule_settings': { 'utc_embargo': utcnow() + timedelta(minutes=20) } }, { '_id': '7', 'genre': [{ 'name': 'Sidebar' }], 'state': 'fetched' }, { '_id': '8', 'subject': [{ 'name': 'adult education', 'qcode': '05001000', 'parent': '05000000' }, { 'name': 'high schools', 'qcode': '05005003', 'parent': '05005000' }], 'state': 'fetched' }, { '_id': '9', 'state': 'fetched', 'anpa_category': [{ 'qcode': 'a', 'name': 'Aus News' }] }, { '_id': '10', 'body_html': '<p>Mention<p>', 'embargo': utcnow(), 'schedule_settings': { 'utc_embargo': utcnow() - timedelta(minutes=20) } }, { '_id': '11', 'place': [{ 'qcode': 'NSW', 'name': 'NSW' }], 'state': 'fetched' }] self.app.data.insert('archive', self.articles) self.app.data.insert('filter_conditions', [{ '_id': 1, 'field': 'headline', 'operator': 'like', 'value': 'tor', 'name': 'test-1' }]) self.app.data.insert('filter_conditions', [{ '_id': 2, 'field': 'urgency', 'operator': 'in', 'value': '2', 'name': 'test-2' }]) self.app.data.insert('filter_conditions', [{ '_id': 3, 'field': 'urgency', 'operator': 'in', 'value': '3,4,5', 'name': 'test-2' }]) self.app.data.insert('filter_conditions', [{ '_id': 4, 'field': 'urgency', 'operator': 'nin', 'value': '1,2,3', 'name': 'test-2' }]) self.app.data.insert('filter_conditions', [{ '_id': 5, 'field': 'urgency', 'operator': 'in', 'value': '2,5', 'name': 'test-2' }]) self.app.data.insert( 'content_filters', [{ "_id": 1, "content_filter": [{ "expression": { "fc": [1] } }], "name": "soccer-only" }])
def get(self, req, lookup): if req is None: req = ParsedRequest() return self.backend.get('tasks', req=req, lookup=lookup)
def featured(self, req, lookup, featured): """Return featured items. :param ParsedRequest req: The parsed in request instance from the endpoint :param dict lookup: The parsed in lookup dictionary from the endpoint :param dict featured: list featured items """ user = get_user() company = get_user_company(user) if is_events_only_access(user, company): abort(403) if not featured or not featured.get('items'): return ListCursor([]) query = _agenda_query() get_resource_service('section_filters').apply_section_filter(query, self.section) planning_items_query = nested_query( 'planning_items', { 'bool': {'must': [{'terms': {'planning_items.guid': featured['items']}}]} }, name='featured' ) if req.args.get('q'): query['bool']['must'].append(query_string(req.args['q'])) planning_items_query['nested']['query']['bool']['must'].append(planning_items_query_string(req.args['q'])) query['bool']['must'].append(planning_items_query) source = {'query': query} set_post_filter(source, req) source['size'] = len(featured['items']) source['from'] = req.args.get('from', 0, type=int) if not source['from']: source['aggs'] = aggregations if company and not is_admin(user) and company.get('events_only', False): # no adhoc planning items and remove planning items and coverages fields query['bool']['must'].append({'exists': {'field': 'event'}}) _remove_fields(source, PLANNING_ITEMS_FIELDS) internal_req = ParsedRequest() internal_req.args = {'source': json.dumps(source)} cursor = self.internal_get(internal_req, lookup) docs_by_id = {} for doc in cursor.docs: for p in (doc.get('planning_items') or []): docs_by_id[p.get('guid')] = doc # make the items display on the featured day, # it's used in ui instead of dates.start and dates.end doc.update({ '_display_from': featured['display_from'], '_display_to': featured['display_to'], }) docs = [] agenda_ids = set() for _id in featured['items']: if docs_by_id.get(_id) and docs_by_id.get(_id).get('_id') not in agenda_ids: docs.append(docs_by_id.get(_id)) agenda_ids.add(docs_by_id.get(_id).get('_id')) cursor.docs = docs return cursor
def traffic_story(item, **kwargs): # The place is used to determine the state the the requests will be limited to if 'place' in item and len(item.get('place')): state = item.get('place')[0].get('qcode').upper() else: return # Current time in UTC to restrict the query to incidents that are currently active today = utcnow() # Also include incidents that started in the last 24 hours yesterday = today - timedelta(hours=24) service = get_resource_service('traffic_incidents') req = ParsedRequest() areas = get_areas().get('features') incidents_map = dict() incidents_html = '' roadworks_html = '' # Scan the areas in the state. for area in [ a for a in areas if a.get('properties', {}).get('state', '').upper() == state ]: base_query = { '$and': [{ 'state': state }, { 'end_date': { '$gt': today } }, { 'start_date': { '$lt': today, '$gt': yesterday } }, { 'incident_type': { '$ne': 'This message is for test purposes only , please ignore' } }, { 'geometry': { '$geoIntersects': { '$geometry': { 'type': 'Polygon', 'coordinates': area.get('geometry').get('coordinates') } } } }] } incident_query = deepcopy(base_query) # Append clauses that will exclude road works incident_query['$and'] += [{ 'incident_type': { '$ne': 'Roadworks' } }, { 'incident_description': { '$not': re.compile('.*{}.*'.format('Maintenance work'), re.IGNORECASE) } }, { 'incident_description': { '$not': re.compile('.*{}.*'.format('Roadworks'), re.IGNORECASE) } }] # Attempt to remove the reporting of apparently permanently closed roads in Brisbane and Perth if state == 'WA': incident_query['$and'].append({ 'incident_description': { '$ne': 'Closed on Brearley Avenue Eastbound in ' 'Perth between Great Eastern ' 'Highway and Second Street.' } }) incident_query['$and'].append({ 'incident_description': { '$ne': 'Closed on Brearley Avenue Westbound in ' 'Perth between Second Street and Great ' 'Eastern Highway.' } }) if state == 'QLD': incident_query['$and'].append({ 'incident_description': { '$ne': 'Entry slip road closed on Sandgate Road ' 'Northbound in Brisbane between Holroyd ' 'Street and Gateway Motorway.' } }) incident_query['$and'].append({ 'incident_description': { '$ne': 'Closed on William Street ' 'Northbound in Brisbane ' 'between Margaret Street and Elizabeth Street.' } }) incident_query['$and'].append({ 'incident_description': { '$ne': 'Closed on William Street South ' 'Bound in Brisbane ' 'between Elizabeth Street and Margaret Street.' } }) incidents = service.get_from_mongo(req=req, lookup=incident_query) if incidents.count(): incidents_html += '<p><b>{}</b></p>'.format( area['properties']['area']) for i in incidents: message = i.get('incident_description').replace( 'lorr(y/ies)', 'truck') message = message.replace('Accident(s)', 'Accident') incidents_html += '<p>{}</p>'.format(message) roadworks_query = deepcopy(base_query) # Append a clause that restrict to roadworks only roadworks_query['$and'].append({ '$or': [{ 'incident_type': 'Roadworks' }, { 'incident_description': re.compile('.*{}.*'.format('Maintenance work'), re.IGNORECASE) }, { 'incident_description': re.compile('.*{}.*'.format('Roadworks'), re.IGNORECASE) }] }) roadworks = service.get_from_mongo(req=req, lookup=roadworks_query) if roadworks.count(): roadworks_html += '<p><b>{}</b></p>'.format( area['properties']['area']) for i in roadworks: roadworks_html += '<p>{}</p>'.format( i.get('incident_description')) incidents_map[ 'incidents'] = 'No incidents at this time.' if incidents_html == '' else incidents_html incidents_map[ 'roadworks'] = 'No roadworks at this time.' if roadworks_html == '' else roadworks_html item['body_html'] = render_template_string(item.get('body_html', ''), **incidents_map) update = {'source': 'Intelematics'} ingest_provider = get_resource_service('ingest_providers').find_one( req=None, source='Intelematics') if ingest_provider: update['ingest_provider'] = ingest_provider.get(config.ID_FIELD) update['body_html'] = item['body_html'] get_resource_service('archive').system_update(item[config.ID_FIELD], update, item) item['source'] = 'Intelematics' # If the macro is being executed by a scheduled template then publish the item as well if 'desk' in kwargs and 'stage' in kwargs: get_resource_service('archive_publish').patch( id=item[config.ID_FIELD], updates={ ITEM_STATE: CONTENT_STATE.PUBLISHED, 'auto_publish': True }) return get_resource_service('archive').find_one( req=None, _id=item[config.ID_FIELD]) return item
def _get_field_values(self): values = {} vocabularies_resource = get_resource_service('vocabularies') values['anpa_category'] = vocabularies_resource.find_one( req=None, _id='categories')['items'] req = ParsedRequest() req.where = json.dumps( {'$or': [{ "schema_field": "genre" }, { "_id": "genre" }]}) genre = vocabularies_resource.get(req=req, lookup=None) if genre.count(): values['genre'] = genre[0]['items'] for voc_id in ('urgency', 'priority', 'type'): try: values[voc_id] = vocabularies_resource.find_one( req=None, _id=voc_id)['items'] except TypeError: values[voc_id] = [] subject = vocabularies_resource.find_one(req=None, schema_field='subject') if subject: values['subject'] = subject['items'] else: values['subject'] = get_subjectcodeitems() values['desk'] = list(get_resource_service('desks').get(None, {})) values['stage'] = self._get_stage_field_values(values['desk']) values['sms'] = [{ 'qcode': 0, 'name': 'False' }, { 'qcode': 1, 'name': 'True' }] values['embargo'] = [{ 'qcode': 0, 'name': 'False' }, { 'qcode': 1, 'name': 'True' }] req = ParsedRequest() req.where = json.dumps({ '$or': [{ "schema_field": "place" }, { "_id": "place" }, { "_id": "locators" }] }) place = vocabularies_resource.get(req=req, lookup=None) if place.count(): values['place'] = place[0]['items'] else: values['place'] = [] values['ingest_provider'] = list( get_resource_service('ingest_providers').get(None, {})) values['featuremedia'] = [{ 'qcode': 1, 'name': 'True' }, { 'qcode': 0, 'name': 'False' }] return values
def setUp(self): self.req = ParsedRequest() with self.app.test_request_context(self.app.config.get("URL_PREFIX")): self.f = ContentFilterService(datasource="content_filters", backend=get_backend()) self.s = SubscribersService(datasource="subscribers", backend=get_backend()) self.articles = [ { "_id": "1", "urgency": 1, "headline": "story", "state": "fetched" }, { "_id": "2", "headline": "prtorque", "state": "fetched" }, { "_id": "3", "urgency": 3, "headline": "creator", "state": "fetched" }, { "_id": "4", "urgency": 4, "state": "fetched" }, { "_id": "5", "urgency": 2, "state": "fetched" }, { "_id": "6", "state": "fetched" }, { "_id": "7", "subject": [{ "scheme": "my_vocabulary", "qcode": "MV:01" }] }, { "_id": "8", "extra": { "custom_text": "my text" } }, ] self.app.data.insert("archive", self.articles) self.app.data.insert( "vocabularies", [ { "_id": "my_vocabulary", "display_name": "My Vocabulary", "type": "manageable", "field_type": None, "schema": { "name": {}, "qcode": {}, "parent": {} }, "items": [{ "name": "option 1", "qcode": "MV:01", "is_active": True }], }, { "_id": "custom_text", "display_name": "Custom Text", "type": "manageable", "field_type": "text" }, ], ) self.app.data.insert( "filter_conditions", [{ "_id": 1, "field": "headline", "operator": "like", "value": "tor", "name": "test-1" }], ) self.app.data.insert("filter_conditions", [{ "_id": 2, "field": "urgency", "operator": "in", "value": "2", "name": "test-2" }]) self.app.data.insert( "filter_conditions", [{ "_id": 3, "field": "headline", "operator": "endswith", "value": "tor", "name": "test-3" }], ) self.app.data.insert( "filter_conditions", [{ "_id": 4, "field": "urgency", "operator": "in", "value": "2,3,4", "name": "test-4" }], ) self.app.data.insert( "filter_conditions", [{ "_id": 5, "field": "headline", "operator": "startswith", "value": "sto", "name": "test-5" }], ) self.app.data.insert( "filter_conditions", [{ "_id": 6, "field": "my_vocabulary", "operator": "in", "value": "MV:01", "name": "test-6" }], ) self.app.data.insert( "filter_conditions", [{ "_id": 7, "field": "custom_text", "operator": "eq", "value": "my text", "name": "test-7" }], ) self.app.data.insert( "content_filters", [{ "_id": 1, "content_filter": [{ "expression": { "fc": [1] } }], "name": "soccer-only" }]) self.app.data.insert( "content_filters", [{ "_id": 2, "content_filter": [{ "expression": { "fc": [4, 3] } }], "name": "soccer-only2" }], ) self.app.data.insert( "content_filters", [{ "_id": 3, "content_filter": [{ "expression": { "pf": [1], "fc": [2] } }], "name": "soccer-only3" }], ) self.app.data.insert( "content_filters", [{ "_id": 4, "content_filter": [{ "expression": { "fc": [3] } }, { "expression": { "fc": [5] } }], "name": "soccer-only4", }], ) self.app.data.insert( "content_filters", [{ "_id": 5, "content_filter": [{ "expression": { "fc": [6] } }], "name": "my-vocabulary" }], ) self.app.data.insert( "content_filters", [{ "_id": 6, "content_filter": [{ "expression": { "fc": [7] } }], "name": "custom-text" }]) self.app.data.insert("products", [{ "_id": 1, "content_filter": { "filter_id": 3, "filter_type": "blocking" }, "name": "p-1" }]) self.app.data.insert("products", [{ "_id": 2, "content_filter": { "filter_id": 1, "filter_type": "blocking" }, "name": "p-2" }]) self.app.data.insert("subscribers", [{ "_id": 1, "products": [1], "name": "sub1" }]) self.app.data.insert("subscribers", [{ "_id": 2, "products": [2], "name": "sub2" }]) self.app.data.insert( "routing_schemes", [{ "_id": 1, "name": "routing_scheme_1", "rules": [{ "filter": 4, "name": "routing_rule_4", "schedule": { "day_of_week": ["MON"], "hour_of_day_from": "0000", "hour_of_day_to": "2355", }, "actions": { "fetch": [], "publish": [], "exit": False }, }], }], )
def am_fronters(item, **kwargs): now = datetime.now() item[ 'abstract'] = '<p>Main stories in Australia\'s newspapers, published on {}</p>'.format( now.strftime('%B %-d, %Y')) body = StringIO() body.write('<p>(Not for publication, this is a guide only.)<br></p>') papers = [{ 'heading': 'THE AUSTRALIAN', 'name': 'The Australian' }, { 'heading': 'THE FINANCIAL REVIEW', 'name': 'The Financial Review' }, { 'heading': 'SYDNEY MORNING HERALD', 'name': 'The Sydney Morning Herald' }, { 'heading': 'THE DAILY TELEGRAPH', 'name': 'The Daily Telegraph' }, { 'heading': 'THE AGE', 'name': 'The Age' }, { 'heading': 'THE HERALD SUN', 'name': 'The Herald Sun' }, { 'heading': 'THE COURIER-MAIL', 'name': 'The Courier-Mail' }, { 'heading': 'THE ADVERTISER', 'name': 'The Advertiser' }, { 'heading': 'THE MERCURY', 'name': 'The Mercury' }, { 'heading': 'WEST AUSTRALIAN', 'name': 'The West Australian' }, { 'heading': 'CANBERRA TIMES', 'name': 'The Canberra Times' }, { 'heading': 'NT NEWS', 'name': 'The NT News' }] for paper in papers: try: service = get_resource_service('published') req = ParsedRequest() query = { "query": { "filtered": { "query": { "query_string": { "query": "headline:(\"Main+stories+in+{}\")".format( paper.get('name').replace(' ', '+')) } }, "filter": { "and": [{ "term": { "anpa_category.qcode": "v" } }] } } } } req.sort = '[("_created", -1)]' req.args = {'source': json.dumps(query)} req.max_results = 1 articles = service.get(req=req, lookup=None) if articles.count(): article = articles[0] # Check that the article is for today, check day month if now.strftime('%B') in article.get( 'abstract') and now.strftime('%-d') in article.get( 'abstract'): body.write('<p>{}</p>'.format(paper.get('heading'))) tree = html.fromstring(article.get('body_html')) pars = tree.xpath('./p') for par in pars: if par.text and par.text.startswith('PAGE 1:'): if len(par.text) > len('PAGE 1:') + 20: body.write('<p>{}</p>'.format(par.text)) elif par.getnext() is not None: body.write('<p>PAGE 1: {}</p>'.format( par.getnext().text)) else: body.write('<p>PAGE 1: {}</p>'.format( par.text.replace('PAGE 1: ', ''))) continue if par.text and par.text.startswith('SPORT:'): if len(par.text) > len('SPORT:') + 20: body.write('<p>SPORT: {}</p>'.format( par.text.replace('SPORT: ', ''))) elif par.getnext() is not None: body.write('<p>SPORT: {}</p>'.format( par.getnext().text)) else: body.write('<p>SPORT: {}</p>'.format( par.text.replace('SPORT: ', ''))) continue else: print('Todays fronter story for {} was not found'.format( paper)) logger.warning( 'Todays fronter story for {} was not found'.format( paper)) else: print('Fronter story for {} was not found'.format(paper)) logger.warning( 'Fronter story for {} was not found'.format(paper)) except Exception as e: logger.warning('Fronter story for {} raised exception: {}'.format( paper, e)) pass item['body_html'] = body.getvalue() body.close() # If the macro is being executed by a scheduled template then publish the item as well if 'desk' in kwargs and 'stage' in kwargs: update = {'body_html': item.get('body_html', '')} get_resource_service('archive').system_update(item[config.ID_FIELD], update, item) get_resource_service('archive_publish').patch( id=item[config.ID_FIELD], updates={ ITEM_STATE: CONTENT_STATE.PUBLISHED, 'auto_publish': True }) return get_resource_service('archive').find_one( req=None, _id=item[config.ID_FIELD]) return item
def check_if_file_already_imported(key): query_filter = get_query_for_already_imported(key) req = ParsedRequest() req.args = {'filter': query_filter} res = superdesk.get_resource_service('archive').get(req, None).count() return res > 0
def get_items(self, query): """Return the result of the item search by the given query """ request = ParsedRequest() request.args = {'source': json.dumps(query), 'repo': 'published'} return get_resource_service('search').get(req=request, lookup=None)
def get_expired_items(self, now): query_filter = self.get_query_for_expired_items(now) req = ParsedRequest() req.max_results = 25 req.args = {'filter': query_filter} return superdesk.get_resource_service('archive').get(req, None)
def validate_delete_action(self, doc, allow_all_types=False): """Runs on delete of archive item. Overriding to validate the item being killed is actually eligible for kill. Validates the following: 1. Is item of type Text? 2. Is item a Broadcast Script? 3. Does item acts as a Master Story for any of the existing broadcasts? 4. Is item available in production or part of a normal package? 5. Is the associated Digital Story is available in production or part of normal package? 6. If item is a Take then is any take available in production or part of normal package? :param doc: represents the article in archived collection :type doc: dict :param allow_all_types: represents if different types of documents are allowed to be killed :type doc: bool :raises SuperdeskApiError.badRequestError() if any of the above validation conditions fail. """ bad_req_error = SuperdeskApiError.badRequestError id_field = doc[config.ID_FIELD] item_id = doc['item_id'] doc['item_id'] = id_field doc[config.ID_FIELD] = item_id if not allow_all_types and doc[ITEM_TYPE] != CONTENT_TYPE.TEXT: raise bad_req_error(message=_( 'Only Text articles are allowed to be Killed in Archived repo') ) if is_genre(doc, BROADCAST_GENRE): raise bad_req_error(message=_( "Killing of Broadcast Items isn't allowed in Archived repo")) if get_resource_service( 'archive_broadcast').get_broadcast_items_from_master_story( doc, True): raise bad_req_error(message=_( "Can't kill as this article acts as a Master Story for existing broadcast(s)" )) if get_resource_service(ARCHIVE).find_one(req=None, _id=doc[GUID_FIELD]): raise bad_req_error(message=_( "Can't Kill as article is still available in production")) if not allow_all_types and is_item_in_package(doc): raise bad_req_error( message=_("Can't kill as article is part of a Package")) takes_package_id = self._get_take_package_id(doc) if takes_package_id: if get_resource_service(ARCHIVE).find_one(req=None, _id=takes_package_id): raise bad_req_error(message=_( "Can't Kill as the Digital Story is still available in production" )) req = ParsedRequest() req.sort = '[("%s", -1)]' % config.VERSION takes_package = list( self.get(req=req, lookup={'item_id': takes_package_id})) if not takes_package: raise bad_req_error(message=_( 'Digital Story of the article not found in Archived repo')) takes_package = takes_package[0] if not allow_all_types and is_item_in_package(takes_package): raise bad_req_error(message=_( "Can't kill as Digital Story is part of a Package")) for takes_ref in self._get_package_refs(takes_package): if takes_ref[RESIDREF] != doc[GUID_FIELD]: if get_resource_service(ARCHIVE).find_one( req=None, _id=takes_ref[RESIDREF]): raise bad_req_error(message=_( "Can't Kill as Take(s) are still available in production" )) take = list( self.get(req=None, lookup={'item_id': takes_ref[RESIDREF]})) if not take: raise bad_req_error(message=_( 'One of Take(s) not found in Archived repo')) if not allow_all_types and is_item_in_package(take[0]): raise bad_req_error(message=_( "Can't kill as one of Take(s) is part of a Package" )) doc['item_id'] = item_id doc[config.ID_FIELD] = id_field
def test_it_excludes_published_content(self): with self.app.app_context(): req = ParsedRequest() req.args = {"repo": "archive"} docs = self.app.data.find("search", req, None)[0] self.assertEquals(1, docs.count())
def setUp(self): self.req = ParsedRequest() with self.app.test_request_context(self.app.config.get('URL_PREFIX')): self.f = ContentFilterService(datasource='content_filters', backend=get_backend()) self.s = SubscribersService(datasource='subscribers', backend=get_backend()) self.articles = [{ '_id': '1', 'urgency': 1, 'headline': 'story', 'state': 'fetched' }, { '_id': '2', 'headline': 'prtorque', 'state': 'fetched' }, { '_id': '3', 'urgency': 3, 'headline': 'creator', 'state': 'fetched' }, { '_id': '4', 'urgency': 4, 'state': 'fetched' }, { '_id': '5', 'urgency': 2, 'state': 'fetched' }, { '_id': '6', 'state': 'fetched' }] self.app.data.insert('archive', self.articles) self.app.data.insert('filter_conditions', [{ '_id': 1, 'field': 'headline', 'operator': 'like', 'value': 'tor', 'name': 'test-1' }]) self.app.data.insert('filter_conditions', [{ '_id': 2, 'field': 'urgency', 'operator': 'in', 'value': '2', 'name': 'test-2' }]) self.app.data.insert('filter_conditions', [{ '_id': 3, 'field': 'headline', 'operator': 'endswith', 'value': 'tor', 'name': 'test-3' }]) self.app.data.insert('filter_conditions', [{ '_id': 4, 'field': 'urgency', 'operator': 'in', 'value': '2,3,4', 'name': 'test-4' }]) self.app.data.insert('filter_conditions', [{ '_id': 5, 'field': 'headline', 'operator': 'startswith', 'value': 'sto', 'name': 'test-5' }]) self.app.data.insert( 'content_filters', [{ "_id": 1, "content_filter": [{ "expression": { "fc": [1] } }], "name": "soccer-only" }]) self.app.data.insert( 'content_filters', [{ "_id": 2, "content_filter": [{ "expression": { "fc": [4, 3] } }], "name": "soccer-only2" }]) self.app.data.insert( 'content_filters', [{ "_id": 3, "content_filter": [{ "expression": { "pf": [1], "fc": [2] } }], "name": "soccer-only3" }]) self.app.data.insert('content_filters', [{ "_id": 4, "content_filter": [{ "expression": { "fc": [3] } }, { "expression": { "fc": [5] } }], "name": "soccer-only4" }]) self.app.data.insert('products', [{ "_id": 1, "content_filter": { "filter_id": 3, "filter_type": "blocking" }, "name": "p-1" }]) self.app.data.insert('products', [{ "_id": 2, "content_filter": { "filter_id": 1, "filter_type": "blocking" }, "name": "p-2" }]) self.app.data.insert('subscribers', [{ "_id": 1, "products": [1], "name": "sub1" }]) self.app.data.insert('subscribers', [{ "_id": 2, "products": [2], "name": "sub2" }]) self.app.data.insert('routing_schemes', [{ "_id": 1, "name": "routing_scheme_1", "rules": [{ "filter": 4, "name": "routing_rule_4", "schedule": { "day_of_week": ["MON"], "hour_of_day_from": "0000", "hour_of_day_to": "2355", }, "actions": { "fetch": [], "publish": [], "exit": False } }] }])
def test_it_excludes_published_content(self): with self.app.app_context(): req = ParsedRequest() req.args = {"repo": "archive"} docs = self.app.data.find("search", req, None)[0] self.assertNotIn(CONTENT_STATE.PUBLISHED, [item["state"] for item in docs])
def get_from_mongo(self, req, lookup, projection=None): if req is None: req = ParsedRequest() if not req.projection and projection: req.projection = json.dumps(projection) return self.backend.get_from_mongo(self.datasource, req=req, lookup=lookup)
def filter_subscribers(self, doc, subscribers, target_media_type): """Filter subscribers to whom the current document is going to be delivered. :param doc: Document to publish/kill/correct :param subscribers: List of Subscribers that might potentially get this document :param target_media_type: dictate if the doc being queued is a Takes Package or an Individual Article. Valid values are - Wire, Digital. If Digital then the doc being queued is a Takes Package and if Wire then the doc being queues is an Individual Article. :return: List of of filtered subscribers and list of product codes per subscriber. """ filtered_subscribers = [] subscriber_codes = {} req = ParsedRequest() req.args = {'is_global': True} filter_service = get_resource_service('content_filters') existing_products = { p[config.ID_FIELD]: p for p in list( get_resource_service('products').get(req=req, lookup=None)) } global_filters = list(filter_service.get(req=req, lookup=None)) # apply global filters self.conforms_global_filter(global_filters, doc) for subscriber in subscribers: if target_media_type and subscriber.get( 'subscriber_type', '') != SUBSCRIBER_TYPES.ALL: can_send_takes_packages = subscriber[ 'subscriber_type'] == SUBSCRIBER_TYPES.DIGITAL if target_media_type == SUBSCRIBER_TYPES.WIRE and can_send_takes_packages or \ target_media_type == SUBSCRIBER_TYPES.DIGITAL and not can_send_takes_packages: continue conforms, skip_filters = self.conforms_subscriber_targets( subscriber, doc) if not conforms: continue if not self.conforms_subscriber_global_filter( subscriber, global_filters): continue product_codes = self._get_codes(subscriber) subscriber_added = False subscriber['api_enabled'] = False # validate against direct products result, codes = self._validate_article_for_subscriber( doc, subscriber.get('products'), existing_products) if result: product_codes.extend(codes) if not subscriber_added: filtered_subscribers.append(subscriber) subscriber_added = True if content_api.is_enabled(): # validate against api products result, codes = self._validate_article_for_subscriber( doc, subscriber.get('api_products'), existing_products) if result: product_codes.extend(codes) subscriber['api_enabled'] = True if not subscriber_added: filtered_subscribers.append(subscriber) subscriber_added = True if skip_filters and not subscriber_added: # if targeted subscriber and has api products then send it to api. if subscriber.get('api_products'): subscriber['api_enabled'] = True filtered_subscribers.append(subscriber) subscriber_added = True # unify the list of codes by removing duplicates if subscriber_added: subscriber_codes[subscriber[config.ID_FIELD]] = list( set(product_codes)) return filtered_subscribers, subscriber_codes
def get_data_version_relation_document(data_relation, reference, latest=False): """ Returns document at the version specified in data_relation, or at the latest version if passed `latest=True`. Returns None if data_relation cannot be satisfied. :param data_relation: the schema definition describing the data_relation. :param reference: a dictionary with a value_field and a version_field. :param latest: if we should obey the version param in reference or not. .. versionadded:: 0.4 """ value_field = data_relation["field"] version_field = app.config["VERSION"] collection = data_relation["resource"] versioned_collection = collection + config.VERSIONS resource_def = app.config["DOMAIN"][data_relation["resource"]] id_field = resource_def["id_field"] # Fetch document data at the referenced version query = {version_field: reference[version_field]} if value_field == id_field: # Versioned documents store the primary id in a different field query[versioned_id_field(resource_def)] = reference[value_field] elif value_field not in versioned_fields(resource_def): # The relation value field is unversioned, and will not be present in # the versioned collection. Need to find id field for version query req = ParsedRequest() if resource_def["soft_delete"]: req.show_deleted = True latest_version = app.data.find_one( collection, req, **{value_field: reference[value_field]}) if not latest_version: return None query[versioned_id_field(resource_def)] = latest_version[id_field] else: # Field will be present in the versioned collection query[value_field] = reference[value_field] referenced_version = app.data.find_one(versioned_collection, None, **query) # support late versioning if referenced_version is None and reference[version_field] == 1: # there is a chance this document hasn't been saved # since versioning was turned on referenced_version = missing_version_field(data_relation, reference) return referenced_version # v1 is both referenced and latest if referenced_version is None: return None # The referenced document version was not found # Fetch the latest version of this document to use in version synthesis query = {id_field: referenced_version[versioned_id_field(resource_def)]} req = ParsedRequest() if resource_def["soft_delete"]: # Still return latest after soft delete. It is needed to synthesize # full document version. req.show_deleted = True latest_version = app.data.find_one(collection, req, **query) if latest is True: return latest_version # Syntheisze referenced version from latest and versioned data document = synthesize_versioned_document(latest_version, referenced_version, resource_def) return document
def get_stage_documents(self, stage_id): query_filter = superdesk.json.dumps({'term': {'task.stage': stage_id}}) req = ParsedRequest() req.args = {'filter': query_filter} return superdesk.get_resource_service('archive').get(req, None)
def get_applied_updates(self): req = ParsedRequest() req.sort = '-name' return tuple(self.data_updates_service.get(req=req, lookup={}))
def get(self, req, lookup): if req is None: req = ParsedRequest() if req.args and req.args.get('is_global'): lookup = {'is_global': True} return self.backend.get(self.datasource, req=req, lookup=lookup)
def get_expired_items(self, page_size): """Get expired item that are not moved to legal :return: """ query = { 'query': { 'filtered': { 'filter': { 'and': [{ 'range': { 'expiry': { 'lt': 'now' } } }, { 'term': { 'moved_to_legal': False } }, { 'not': { 'term': { 'state': CONTENT_STATE.SCHEDULED } } }] } } } } service = get_resource_service('published') req = ParsedRequest() req.args = {'source': json.dumps(query)} req.sort = '[("publish_sequence_no", 1)]' cursor = service.get(req=req, lookup=None) count = cursor.count() no_of_pages = 0 if count: no_of_pages = len(range(0, count, page_size)) sequence_no = cursor[0]['publish_sequence_no'] logger.info( 'Number of items to move to legal archive: {}, pages={}'.format( count, no_of_pages)) for page in range(0, no_of_pages): logger.info('Fetching published items ' 'for page number: {} sequence no: {}'.format( (page + 1), sequence_no)) req = ParsedRequest() page_query = deepcopy(query) sequence_filter = { 'range': { 'publish_sequence_no': { 'gte': sequence_no } } } if page == 0: sequence_filter = { 'range': { 'publish_sequence_no': { 'gte': sequence_no } } } else: sequence_filter = { 'range': { 'publish_sequence_no': { 'gt': sequence_no } } } page_query['query']['filtered']['filter']['and'].append( sequence_filter) req.args = {'source': json.dumps(page_query)} req.sort = '[("publish_sequence_no", 1)]' req.max_results = page_size cursor = service.get(req=req, lookup=None) items = list(cursor) if len(items): sequence_no = items[len(items) - 1]['publish_sequence_no'] logger.info('Fetched No. of Items: {} for page: {} ' 'For import into legal archive.'.format( len(items), (page + 1))) yield items
def test_import_into_legal_archive(self): archive_publish = get_resource_service('archive_publish') archive_correct = get_resource_service('archive_correct') legal_archive = get_resource_service('legal_archive') archive = get_resource_service('archive_publish') published = get_resource_service('published') publish_queue = get_resource_service('publish_queue') self.original_method = LegalArchiveImport.upsert_into_legal_archive LegalArchiveImport.upsert_into_legal_archive = MagicMock() for item in self.archive_items: archive_publish.patch(item['_id'], { 'headline': 'publishing', 'abstract': 'publishing' }) for item in self.archive_items: legal_item = legal_archive.find_one(req=None, _id=item['_id']) self.assertIsNone(legal_item, 'Item: {} is not none.'.format(item['_id'])) archive_correct.patch(self.archive_items[1]['_id'], { 'headline': 'correcting', 'abstract': 'correcting' }) LegalArchiveImport.upsert_into_legal_archive = self.original_method self.class_under_test().run(1) # items are not expired for item in self.archive_items: legal_item = legal_archive.find_one(req=None, _id=item['_id']) self.assertIsNone(legal_item, 'Item: {} is not none.'.format(item['_id'])) # expire the items for item in self.archive_items: original = archive.find_one(req=None, _id=item['_id']) archive.system_update(item['_id'], {'expiry': utcnow() - timedelta(minutes=30)}, original) published.update_published_items(item['_id'], 'expiry', utcnow() - timedelta(minutes=30)) # run the command after expiry self.class_under_test().run(1) # items are expired for item in self.archive_items: legal_item = legal_archive.find_one(req=None, _id=item['_id']) self.assertEqual( legal_item['_id'], item['_id'], 'item {} not imported to legal'.format(item['_id'])) # items are moved to legal for item in self.archive_items: published_items = list( published.get_other_published_items(item['_id'])) for published_item in published_items: self.assertEqual(published_item['moved_to_legal'], True) # items are moved to legal publish queue for item in self.archive_items: req = ParsedRequest() req.where = json.dumps({'item_id': item['_id']}) queue_items = list(publish_queue.get(req=req, lookup=None)) self.assertGreaterEqual(len(queue_items), 1) for queue_item in queue_items: self.assertEqual(queue_item['moved_to_legal'], True)
def delete(resource, **lookup): """Deletes all item of a resource (collection in MongoDB terms). Won't drop indexes. Use with caution! .. versionchanged:: 0.5 Return 204 NoContent instead of 200. .. versionchanged:: 0.4 Support for document versioning. 'on_delete_resource' raised before performing the actual delete. 'on_deleted_resource' raised after performing the delete .. versionchanged:: 0.3 Support for the lookup filter, which allows for develtion of sub-resources (only delete documents that match a given condition). .. versionchanged:: 0.0.4 Added the ``requires_auth`` decorator. .. versionadded:: 0.0.2 """ resource_def = config.DOMAIN[resource] getattr(app, "on_delete_resource")(resource) getattr(app, "on_delete_resource_%s" % resource)() default_request = ParsedRequest() if resource_def["soft_delete"]: # get_document should always fetch soft deleted documents from the db # callers must handle soft deleted documents default_request.show_deleted = True result, _ = app.data.find(resource, default_request, lookup) originals = list(result) if not originals: return all_done() # I add new callback as I want the framework to be retro-compatible getattr(app, "on_delete_resource_originals")(resource, originals, lookup) getattr(app, "on_delete_resource_originals_%s" % resource)(originals, lookup) id_field = resource_def["id_field"] if resource_def["soft_delete"]: # I need to check that I have at least some documents not soft_deleted # I skip all the soft_deleted documents originals = [x for x in originals if not x.get(config.DELETED)] if not originals: # Nothing to be deleted return all_done() for document in originals: lookup[id_field] = document[id_field] deleteitem_internal(resource, concurrency_check=False, suppress_callbacks=True, original=document, **lookup) else: # TODO if the resource schema includes media files, these won't be # deleted by use of this global method (it should be disabled). Media # cleanup is handled at the item endpoint by the delete() method # (see above). app.data.remove(resource, lookup) # TODO: should attempt to delete version collection even if setting is # off if resource_def["versioning"] is True: app.data.remove(resource + config.VERSIONS, lookup) getattr(app, "on_deleted_resource")(resource) getattr(app, "on_deleted_resource_%s" % resource)() return all_done()
def get(self, req, lookup): if req is None: req = ParsedRequest() docs = super().get(req, lookup) return (docs)
def _validate_associated_items(self, original_item, updates=None, validation_errors=None): """Validates associated items. This function will ensure that the unpublished content validates and none of the content is locked, also do not allow any killed or recalled or spiked content. :param package: :param validation_errors: validation errors are appended if there are any. """ if validation_errors is None: validation_errors = [] if updates is None: updates = {} # merge associations associations = deepcopy(original_item.get(ASSOCIATIONS, {})) associations.update(updates.get(ASSOCIATIONS, {})) items = [value for value in associations.values()] if original_item[ITEM_TYPE] == CONTENT_TYPE.COMPOSITE and \ self.publish_type == ITEM_PUBLISH: items.extend(self.package_service.get_residrefs(original_item)) for item in items: if type(item) == dict and item.get(config.ID_FIELD): doc = item # enhance doc with lock_user req = ParsedRequest() req.args = {} req.projection = json.dumps({'lock_user': 1}) try: doc.update({ 'lock_user': super().find_one( req=req, _id=item[config.ID_FIELD])['lock_user'] }) except (TypeError, KeyError): pass elif item: doc = super().find_one(req=None, _id=item) else: continue if not doc: continue if original_item[ITEM_TYPE] == CONTENT_TYPE.COMPOSITE: self._validate_associated_items( doc, validation_errors=validation_errors) # make sure no items are killed or recalled or spiked or scheduled doc_item_state = doc.get(ITEM_STATE, CONTENT_STATE.PUBLISHED) if doc_item_state in { CONTENT_STATE.KILLED, CONTENT_STATE.RECALLED, CONTENT_STATE.SPIKED, CONTENT_STATE.SCHEDULED }: validation_errors.append( 'Item cannot contain associated {} item'.format( doc[ITEM_STATE])) if doc.get(EMBARGO): validation_errors.append( 'Item cannot have associated items with Embargo') # don't validate items that already have published if doc_item_state not in [ CONTENT_STATE.PUBLISHED, CONTENT_STATE.CORRECTED ]: validate_item = { 'act': self.publish_type, 'type': doc[ITEM_TYPE], 'validate': doc } if type(item) == dict: validate_item['embedded'] = True errors = get_resource_service('validate').post([validate_item], headline=True) if errors[0]: pre_errors = [ 'Associated item %s %s' % (doc.get('slugline', ''), error) for error in errors[0] ] validation_errors.extend(pre_errors) if config.PUBLISH_ASSOCIATED_ITEMS: # check the locks on the items if doc.get('lock_user'): if original_item['lock_user'] != doc['lock_user']: validation_errors.extend([ '{}: {}'.format( doc.get('headline', doc['_id']), _('packaged item is locked by another user')) ]) elif original_item['lock_user'] == doc['lock_user']: validation_errors.extend([ '{}: {}'.format( doc.get('headline', doc['_id']), _('packaged item is locked by you. Unlock it and try again' )) ])
def _remove_expired_items(self, expiry_datetime, lock_name): """Remove the expired items. :param datetime expiry_datetime: expiry datetime :param str log_msg: log message to be prefixed :param str lock_name: lock name to touch """ logger.info('{} Starting to remove published expired items.'.format( self.log_msg)) archive_service = get_resource_service(ARCHIVE) published_service = get_resource_service('published') items_to_remove = set() items_to_be_archived = dict() items_having_issues = dict() preserve_published_desks = { desk.get(config.ID_FIELD): 1 for desk in get_resource_service('desks').find( where={'preserve_published_content': True}) } for expired_items in archive_service.get_expired_items( expiry_datetime): if len(expired_items) == 0: logger.info('{} No items found to expire.'.format( self.log_msg)) return if not touch(lock_name, expire=600): logger.warning( '{} lost lock while removing expired items.'.format( self.log_msg)) return # delete spiked items self.delete_spiked_items(expired_items) # get killed items killed_items = { item.get(config.ID_FIELD): item for item in expired_items if item.get(ITEM_STATE) in {CONTENT_STATE.KILLED, CONTENT_STATE.RECALLED} } # check if killed items imported to legal items_having_issues.update( self.check_if_items_imported_to_legal_archive(killed_items)) # filter out the killed items not imported to legal. killed_items = { item_id: item for item_id, item in killed_items.items() if item_id not in items_having_issues } # Get the not killed and spiked items not_killed_items = { item.get(config.ID_FIELD): item for item in expired_items if item.get(ITEM_STATE) not in { CONTENT_STATE.KILLED, CONTENT_STATE.SPIKED, CONTENT_STATE.RECALLED } } log_msg_format = "{{'_id': {_id}, 'unique_name': {unique_name}, 'version': {_current_version}, " \ "'expired_on': {expiry}}}." # Processing items to expire for item_id, item in not_killed_items.items(): item.setdefault(config.VERSION, 1) item.setdefault('expiry', expiry_datetime) item.setdefault('unique_name', '') expiry_msg = log_msg_format.format(**item) logger.info('{} Processing expired item. {}'.format( self.log_msg, expiry_msg)) processed_items = dict() if item_id not in items_to_be_archived and item_id not in items_having_issues and \ self._can_remove_item(item, processed_items, preserve_published_desks): # item can be archived and removed from the database logger.info('{} Removing item. {}'.format( self.log_msg, expiry_msg)) logger.info('{} Items to be removed. {}'.format( self.log_msg, processed_items)) issues = self.check_if_items_imported_to_legal_archive( processed_items) if issues: items_having_issues.update(processed_items) else: items_to_be_archived.update(processed_items) # all items to expire items_to_expire = deepcopy(items_to_be_archived) # check once again in items imported to legal items_having_issues.update( self.check_if_items_imported_to_legal_archive(items_to_expire)) if items_having_issues: # remove items not imported to legal items_to_expire = { item_id: item for item_id, item in items_to_expire.items() if item_id not in items_having_issues } # remove items not imported to legal from archived items items_to_be_archived = { item_id: item for item_id, item in items_to_be_archived.items() if item_id not in items_having_issues } # items_to_be_archived might contain killed items for item_id, item in items_to_be_archived.items(): if item.get(ITEM_STATE) in { CONTENT_STATE.KILLED, CONTENT_STATE.RECALLED }: killed_items[item_id] = item # remove killed items from the items_to_be_archived items_to_be_archived = { item_id: item for item_id, item in items_to_be_archived.items() if item.get(ITEM_STATE) not in {CONTENT_STATE.KILLED, CONTENT_STATE.RECALLED} } # add killed items to items to expire items_to_expire.update(killed_items) # get the filter conditions logger.info('{} Loading filter conditions.'.format(self.log_msg)) req = ParsedRequest() filter_conditions = list( get_resource_service('content_filters').get( req=req, lookup={'is_archived_filter': True})) # move to archived collection logger.info('{} Archiving items.'.format(self.log_msg)) for _item_id, item in items_to_be_archived.items(): self._move_to_archived(item, filter_conditions) for item_id, item in killed_items.items(): # delete from the published collection and queue msg = log_msg_format.format(**item) try: published_service.delete_by_article_id(item_id) logger.info( '{} Deleting killed item from published. {}'.format( self.log_msg, msg)) items_to_remove.add(item_id) except Exception: logger.exception( '{} Failed to delete killed item from published. {}'. format(self.log_msg, msg)) if items_to_remove: logger.info('{} Deleting articles.: {}'.format( self.log_msg, items_to_remove)) archive_service.delete_by_article_ids(list(items_to_remove)) for _item_id, item in items_having_issues.items(): msg = log_msg_format.format(**item) try: archive_service.system_update(item.get(config.ID_FIELD), {'expiry_status': 'invalid'}, item) logger.info('{} Setting item expiry status. {}'.format( self.log_msg, msg)) except Exception: logger.exception( '{} Failed to set expiry status for item. {}'.format( self.log_msg, msg)) logger.info('{} Deleting killed from archive.'.format( self.log_msg))
def find_one(self, resource, filter, projection): req = ParsedRequest() req.args = {} req.projection = projection return self.data_layer.find_one(resource, req, **filter)