def test_files_dont_duplicate_ingest(self): provider_name = 'reuters' guid = 'tag_reuters.com_2014_newsml_KBN0FL0NM:10' provider = get_resource_service('ingest_providers').find_one(name=provider_name, req=None) provider_service = self._get_provider_service(provider) provider_service.provider = provider provider_service.URL = provider.get('config', {}).get('url') items = provider_service.fetch_ingest(guid) for item in items: item['ingest_provider'] = provider['_id'] item['expiry'] = utcnow() + timedelta(hours=11) # ingest the items self.ingest_items(items, provider, provider_service) items = provider_service.fetch_ingest(guid) for item in items: item['ingest_provider'] = provider['_id'] item['expiry'] = utcnow() + timedelta(hours=11) # ingest them again self.ingest_items(items, provider, provider_service) # 12 files in grid fs current_files = self.app.media.fs('upload').find() self.assertEqual(12, current_files.count())
def test_ingest_cancellation(self): provider_name = 'reuters' guid = 'tag_reuters.com_2016_newsml_L1N14N0FF:978556838' provider = get_resource_service('ingest_providers').find_one(name=provider_name, req=None) provider_service = self._get_provider_service(provider) provider_service.provider = provider provider_service.URL = provider.get('config', {}).get('url') items = provider_service.fetch_ingest(guid) for item in items: item['ingest_provider'] = provider['_id'] item['expiry'] = utcnow() + timedelta(hours=11) self.ingest_items(items, provider, provider_service) guid = 'tag_reuters.com_2016_newsml_L1N14N0FF:1542761538' items = provider_service.fetch_ingest(guid) for item in items: item['ingest_provider'] = provider['_id'] item['expiry'] = utcnow() + timedelta(hours=11) self.ingest_items(items, provider, provider_service) ingest_service = get_resource_service('ingest') lookup = {'uri': items[0].get('uri')} family_members = ingest_service.get_from_mongo(req=None, lookup=lookup) self.assertEqual(family_members.count(), 2) for relative in family_members: self.assertEqual(relative['pubstatus'], 'canceled') self.assertEqual(relative['state'], 'killed')
def test_get_article_ids(self): provider_name = 'reuters' provider, provider_service = self.setup_reuters_provider() ids = provider_service._get_article_ids('channel1', utcnow(), utcnow() + timedelta(minutes=-10)) self.assertEqual(len(ids), 3) provider = get_resource_service('ingest_providers').find_one(name=provider_name, req=None) self.assertEqual(provider['tokens']['poll_tokens']['channel1'], 'ExwaY31kfnR2Z2J1cWZ2YnxoYH9kfw==')
def test_ingest_update(self): provider_name = 'reuters' guid = 'tag_reuters.com_2014_newsml_KBN0FL0NN:5' provider = get_resource_service('ingest_providers').find_one(name=provider_name, req=None) provider_service = self._get_provider_service(provider) provider_service.provider = provider provider_service.URL = provider.get('config', {}).get('url') items = provider_service.fetch_ingest(guid) items[0]['ingest_provider'] = provider['_id'] items[0]['expiry'] = utcnow() + timedelta(hours=11) self.ingest_items(items, provider, provider_service) self.assertEqual(items[0]['unique_id'], 1) original_id = items[0]['_id'] items = provider_service.fetch_ingest(guid) items[0]['ingest_provider'] = provider['_id'] items[0]['expiry'] = utcnow() + timedelta(hours=11) # change the headline items[0]['headline'] = 'Updated headline' # ingest the item again self.ingest_items(items, provider, provider_service) # see the update to the headline and unique_id survives elastic_item = self.app.data._search_backend('ingest').find_one('ingest', _id=original_id, req=None) self.assertEqual(elastic_item['headline'], 'Updated headline') self.assertEqual(elastic_item['unique_id'], 1) self.assertEqual(elastic_item['unique_name'], '#1')
def test_ingest_associated_item_renditions(self): provider = {'feeding_service': 'ninjs', '_id': self.providers['ninjs']} provider_service = FileFeedingService() item = { 'guid': 'foo', 'type': 'text', 'versioncreated': utcnow(), 'associations': { 'featuremedia': { 'guid': 'bar', 'type': 'picture', 'versioncreated': utcnow(), 'renditions': { 'original': { 'href': 'https://farm8.staticflickr.com/7300/9203849352_297ea4207d_z_d.jpg', 'mimetype': 'image/jpeg', 'width': 640, 'height': 426, } } } } } # avoid transfer_renditions call which would store the picture locally # and it would fetch it using superdesk url which doesn't work in test with patch('superdesk.io.commands.update_ingest.transfer_renditions'): status, ids = ingest.ingest_item(item, provider, provider_service) self.assertTrue(status) self.assertEqual(2, len(ids)) self.assertIn('thumbnail', item['associations']['featuremedia']['renditions'])
def filter_expired_items(provider, items): try: days_to_keep_content = provider.get('days_to_keep', DAYS_TO_KEEP) expiration_date = utcnow() - timedelta(days=days_to_keep_content) return [item for item in items if item.get('versioncreated', utcnow()) > expiration_date] except Exception as ex: raise ProviderError.providerFilterExpiredContentError(ex, provider)
def on_update(self, updates, original): # in the case we have a comment if original['post_status'] == 'comment': original['blog'] = original['groups'][1]['refs'][0]['item']['client_blog'] updates['blog'] = original['groups'][1]['refs'][0]['item']['client_blog'] # if the length of the comment is not between 1 and 300 then we get an error check_comment_length(original['groups'][1]['refs'][0]['item']['text']) # check if updates `content` is diffrent then the original. content_diff = False if not updates.get('groups', False): content_diff = False elif len(original['groups'][1]['refs']) != len(updates['groups'][1]['refs']): content_diff = True else: for index, val in enumerate(updates['groups'][1]['refs']): item = get_resource_service('archive').find_one(req=None, _id=val['residRef']) if item['text'] != original['groups'][1]['refs'][index]['item']['text']: content_diff = True break if(content_diff): updates['content_updated_date'] = utcnow() # check permission post = original.copy() post.update(updates) self.check_post_permission(post) # when publishing, put the published item from drafts and contributions at the top of the timeline if updates.get('post_status') == 'open' and original.get('post_status') in ('draft', 'submitted', 'comment'): updates['order'] = self.get_next_order_sequence(original.get('blog')) # if you publish a post it will save a published date and register who did it updates['published_date'] = utcnow() updates['publisher'] = getattr(flask.g, 'user', None) # when unpublishing if original.get('post_status') == 'open' and updates.get('post_status') != 'open': updates['unpublished_date'] = utcnow() super().on_update(updates, original)
def test_ingest_update(self): provider, provider_service = self.setup_reuters_provider() items = provider_service.fetch_ingest(reuters_guid) items[0]['ingest_provider'] = provider['_id'] items[0]['expiry'] = utcnow() + timedelta(hours=11) self.ingest_items(items, provider, provider_service) self.assertEqual(items[0]['unique_id'], 1) original_id = items[0]['_id'] items = provider_service.fetch_ingest(reuters_guid) items[0]['ingest_provider'] = provider['_id'] items[0]['expiry'] = utcnow() + timedelta(hours=11) # change the headline items[0]['headline'] = 'Updated headline' # ingest the item again self.ingest_items(items, provider, provider_service) # see the update to the headline and unique_id survives elastic_item = self.app.data._search_backend('ingest').find_one('ingest', _id=original_id, req=None) self.assertEqual(elastic_item['headline'], 'Updated headline') self.assertEqual(elastic_item['unique_id'], 1) self.assertEqual(elastic_item['unique_name'], '#1')
def TestLocator(self): article = { 'source': 'AAP', 'anpa_category': [{'qcode': 's'}], 'headline': 'This is a test headline', 'byline': 'joe', 'slugline': 'slugline', 'subject': [{'qcode': '15017000'}], 'anpa_take_key': 'take_key', 'unique_id': '1', 'type': 'preformatted', 'body_html': 'The story body', 'word_count': '1', 'priority': '1', 'firstcreated': utcnow(), 'versioncreated': utcnow(), 'lock_user': ObjectId(), 'place': [{'qcode': 'VIC', 'name': 'VIC'}] } subscriber = self.app.data.find('subscribers', None, None)[0] f = AAPBulletinBuilderFormatter() seq, item = f.format(article, subscriber)[0] self.assertGreater(int(seq), 0) test_article = json.loads(item) self.assertEqual(test_article['headline'], 'This is a test headline') self.assertEqual(test_article['place'][0]['qcode'], 'CRIK') article['anpa_category'] = [{'qcode': 'a'}] article['place'] = [{'qcode': 'VIC', 'name': 'VIC'}] seq, item = f.format(article, subscriber)[0] self.assertGreater(int(seq), 0) test_article = json.loads(item) self.assertEqual(test_article['headline'], 'This is a test headline') self.assertEqual(test_article['place'][0]['qcode'], 'VIC')
def test_new_zealand_content_with_source_NZN(self): article = { 'source': 'NZN', 'anpa_category': [{'qcode': 's'}], 'headline': 'This is a test headline', 'byline': 'joe', 'slugline': 'slugline', 'subject': [{'qcode': '15017000'}], 'anpa_take_key': 'take_key', 'unique_id': '1', 'type': 'text', 'format': 'preserved', 'body_html': 'The story body', 'word_count': '1', 'priority': '1', 'firstcreated': utcnow(), 'versioncreated': utcnow(), 'lock_user': ObjectId(), 'task': { 'desk': self.desks[1][config.ID_FIELD] } } subscriber = self.app.data.find('subscribers', None, None)[0] seq, item = self._formatter.format(article, subscriber)[0] item = json.loads(item) self.assertGreater(int(seq), 0) test_article = json.loads(item.get('data')) self.assertEqual(test_article['source'], 'NZN')
def setUp(self): try: from superdesk.publish.publish_content import get_queue_items except ImportError: self.fail("Could not import function under test (transmit_items).") else: self.func_under_test = get_queue_items self.queue_items = [ {'_id': ObjectId(), 'state': 'pending', 'item_id': 'item_1', 'item_version': 4, 'headline': 'pending headline', 'destination': {}}, {'_id': ObjectId(), 'state': 'retrying', 'item_id': 'item_2', 'item_version': 4, 'headline': 'retrying headline', 'retry_attempt': 2, 'next_retry_attempt_at': utcnow() + timedelta(minutes=30)}, {'_id': ObjectId(), 'state': 'success', 'item_id': 'item_3', 'item_version': 4, 'headline': 'success headline', 'retry_attempt': 4, 'next_retry_attempt_at': utcnow() + timedelta(minutes=-30)}, {'_id': ObjectId(), 'state': 'failed', 'item_id': 'item_4', 'item_version': 4, 'headline': 'failed headline', 'retry_attempt': 10, 'next_retry_attempt_at': utcnow() + timedelta(minutes=-30)}, {'_id': ObjectId(), 'state': 'canceled', 'item_id': 'item_5', 'item_version': 4, 'headline': 'canceled headline', 'retry_attempt': 4, 'next_retry_attempt_at': utcnow() + timedelta(minutes=-30)}, {'_id': ObjectId(), 'state': 'retrying', 'item_id': 'item_6', 'item_version': 4, 'headline': 'retrying headline', 'retry_attempt': 2, 'next_retry_attempt_at': utcnow() + timedelta(minutes=-30)}, ] self.app.data.insert('publish_queue', self.queue_items)
def test_single_category_allow_features(self): article = { 'source': 'AAP', 'anpa_category': [{'qcode': 'c'}], 'headline': 'This is a test headline', 'byline': 'joe', 'slugline': 'slugline', 'subject': [{'qcode': '15017000'}], 'anpa_take_key': 'take_key', 'unique_id': '1', 'type': 'text', 'format': 'HTML', 'body_html': 'The story body', 'word_count': '1', 'priority': '1', 'firstcreated': utcnow(), 'versioncreated': utcnow(), 'lock_user': ObjectId(), 'task': { 'desk': self.desks[0][config.ID_FIELD] } } subscriber = self.app.data.find('subscribers', None, None)[0] seq, item = self._formatter.format(article, subscriber)[0] item = json.loads(item) self.assertGreater(int(seq), 0) test_article = json.loads(item.get('data')) self.assertEqual(test_article['source'], 'AAP') self.assertEqual(test_article['first_category']['qcode'], 'c') self.assertEqual(len(test_article['anpa_category']), 1) self.assertEqual(test_article['anpa_category'][0]['qcode'], 'c')
def test_bulletin_builder_formatter(self): article = { config.ID_FIELD: '123', config.VERSION: 2, 'source': 'AAP', 'anpa_category': [{'qcode': 'a'}], 'headline': 'This is a test headline', 'byline': 'joe', 'slugline': 'slugline', 'subject': [{'qcode': '02011001'}], 'anpa_take_key': 'take_key', 'unique_id': '1', 'type': 'preformatted', 'body_html': 'The story body', 'abstract': 'abstract', 'word_count': '1', 'priority': '1', 'firstcreated': utcnow(), 'versioncreated': utcnow(), 'lock_user': ObjectId() } subscriber = self.app.data.find('subscribers', None, None)[0] seq, item = self._formatter.format(article, subscriber)[0] item = json.loads(item) self.assertGreater(int(seq), 0) self.assertEqual(article[config.ID_FIELD], item.get('id')) self.assertEqual(article[config.VERSION], item.get('version')) self.assertEqual(article[ITEM_TYPE], item.get(ITEM_TYPE)) self.assertEqual(article.get(PACKAGE_TYPE, ''), item.get(PACKAGE_TYPE)) self.assertEqual(article['headline'], item.get('headline')) self.assertEqual(article['slugline'], item.get('slugline')) formatted_item = json.loads(item.get('data')) self.assertEqual(article['headline'], formatted_item['headline'])
def test_auto_publish_without_abstract_other_source(self): article = { 'source': 'AAP', 'anpa_category': [{'qcode': 'c'}], 'headline': 'This is a test headline', 'auto_publish': True, 'byline': 'joe', 'slugline': 'slugline', 'subject': [{'qcode': '15017000'}], 'anpa_take_key': 'take_key', 'unique_id': '1', 'type': 'text', 'format': 'HTML', 'body_html': 'Sydney, AAP - The story body text.', 'word_count': '1', 'priority': '1', 'firstcreated': utcnow(), 'versioncreated': utcnow(), 'lock_user': ObjectId(), 'task': { 'desk': self.desks[0][config.ID_FIELD] } } subscriber = self.app.data.find('subscribers', None, None)[0] seq, item = self._formatter.format(article, subscriber)[0] item = json.loads(item) self.assertGreater(int(seq), 0) test_article = json.loads(item.get('data')) self.assertEqual(test_article['source'], 'AAP') self.assertEqual(test_article['abstract'], 'This is a test headline') self.assertEqual(test_article['slugline'], 'slugline') self.assertEqual(test_article['body_text'], 'Sydney, AAP - The story body text.')
def test_body_footer(self): article = { 'source': 'AAP', 'anpa_category': [{'qcode': 's'}], 'headline': 'This is a test headline', 'byline': 'joe', 'slugline': 'slugline', 'subject': [{'qcode': '15017000'}], 'anpa_take_key': 'take_key', 'unique_id': '1', 'type': 'preformatted', 'body_html': 'The story body', 'word_count': '1', 'priority': '1', 'firstcreated': utcnow(), 'versioncreated': utcnow(), 'lock_user': ObjectId(), 'body_footer': 'call helpline 999 if you are planning to quit smoking' } subscriber = self.app.data.find('subscribers', None, None)[0] seq, item = self._formatter.format(article, subscriber)[0] formatted_article = json.loads(item) self.assertEqual(formatted_article['body_text'], 'The story body call helpline 999 if you are planning to quit smoking')
def update_provider(provider, rule_set=None, routing_scheme=None): """ Fetches items from ingest provider as per the configuration, ingests them into Superdesk and updates the provider. """ if ingest_for_provider_is_already_running(provider): return try: update = { LAST_UPDATED: utcnow() } for items in providers[provider.get('type')].update(provider): ingest_items(items, provider, rule_set, routing_scheme) stats.incr('ingest.ingested_items', len(items)) if items: update[LAST_ITEM_UPDATE] = utcnow() ingest_service = superdesk.get_resource_service('ingest_providers') ingest_service.system_update(provider[superdesk.config.ID_FIELD], update, provider) if LAST_ITEM_UPDATE not in update and get_is_idle(provider): notify_and_add_activity( ACTIVITY_EVENT, 'Provider {{name}} has gone strangely quiet. Last activity was on {{last}}', resource='ingest_providers', user_list=ingest_service._get_administrators(), name=provider.get('name'), last=provider[LAST_ITEM_UPDATE].replace(tzinfo=timezone.utc).astimezone(tz=None).strftime("%c")) logger.info('Provider {0} updated'.format(provider[superdesk.config.ID_FIELD])) push_notification('ingest:update', provider_id=str(provider[superdesk.config.ID_FIELD])) finally: mark_provider_as_not_running(provider)
def TestBulletinBuilderFormatter(self): article = { 'source': 'AAP', 'anpa-category': {'qcode': 'a'}, 'headline': 'This is a test headline', 'byline': 'joe', 'slugline': 'slugline', 'subject': [{'qcode': '02011001'}], 'anpa_take_key': 'take_key', 'unique_id': '1', 'type': 'preformatted', 'body_html': 'The story body', 'word_count': '1', 'priority': '1', 'firstcreated': utcnow(), 'versioncreated': utcnow(), 'lock_user': ObjectId() } with self.app.app_context(): subscriber = self.app.data.find('subscribers', None, None)[0] f = AAPBulletinBuilderFormatter() seq, item = f.format(article, subscriber)[0] self.assertGreater(int(seq), 0) self.assertEqual(json.dumps(article, default=json_serialize_datetime_objectId), item)
def setUp(self): self.req = ParsedRequest() with self.app.test_request_context(self.app.config.get('URL_PREFIX')): self.articles = [{'_id': '1', 'urgency': 1, 'headline': 'story', 'state': 'fetched'}, {'_id': '2', 'headline': 'prtorque', 'state': 'fetched'}, {'_id': '3', 'urgency': 3, 'state': 'fetched', 'flags': {'marked_for_sms': True}}, {'_id': '4', 'urgency': 4, 'state': 'fetched', 'task': {'desk': '1'}, 'ingest_provider': '1'}, {'_id': '5', 'urgency': 2, 'state': 'fetched', 'task': {'desk': '2'}, 'priority': 3}, {'_id': '6', 'state': 'fetched', 'embargo': utcnow(), 'schedule_settings': {'utc_embargo': utcnow() + timedelta(minutes=20)}}, {'_id': '7', 'genre': [{'name': 'Sidebar'}], 'state': 'fetched'}, {'_id': '8', 'subject': [{'name': 'adult education', 'qcode': '05001000', 'parent': '05000000'}, {'name': 'high schools', 'qcode': '05005003', 'parent': '05005000'}], 'state': 'fetched'}, {'_id': '9', 'state': 'fetched', 'anpa_category': [{'qcode': 'a', 'name': 'Aus News'}]}, {'_id': '10', 'body_html': '<p>Mention<p>', 'embargo': utcnow(), 'schedule_settings': {'utc_embargo': utcnow() - timedelta(minutes=20)}}, {'_id': '11', 'place': [{'qcode': 'NSW', 'name': 'NSW'}], 'state': 'fetched'}] self.app.data.insert('archive', self.articles) self.app.data.insert('filter_conditions', [{'_id': 1, 'field': 'headline', 'operator': 'like', 'value': 'tor', 'name': 'test-1'}]) self.app.data.insert('filter_conditions', [{'_id': 2, 'field': 'urgency', 'operator': 'in', 'value': '2', 'name': 'test-2'}]) self.app.data.insert('filter_conditions', [{'_id': 3, 'field': 'urgency', 'operator': 'in', 'value': '3,4,5', 'name': 'test-2'}]) self.app.data.insert('filter_conditions', [{'_id': 4, 'field': 'urgency', 'operator': 'nin', 'value': '1,2,3', 'name': 'test-2'}]) self.app.data.insert('filter_conditions', [{'_id': 5, 'field': 'urgency', 'operator': 'in', 'value': '2,5', 'name': 'test-2'}]) self.app.data.insert('content_filters', [{"_id": 1, "content_filter": [{"expression": {"fc": [1]}}], "name": "soccer-only"}])
def transmit_subscriber_items(self, queue_items, subscriber): # Attempt to obtain a lock for transmissions to the subscriber lock_name = get_lock_id("Subscriber", "Transmit", subscriber) if not lock(lock_name, expire=610): return for queue_item in queue_items: publish_queue_service = get_resource_service(PUBLISH_QUEUE) log_msg = ( "_id: {_id} item_id: {item_id} state: {state} " "item_version: {item_version} headline: {headline}".format(**queue_item) ) try: # check the status of the queue item queue_item = publish_queue_service.find_one(req=None, _id=queue_item[config.ID_FIELD]) if queue_item.get("state") not in [QueueState.PENDING.value, QueueState.RETRYING.value]: logger.info( "Transmit State is not pending/retrying for queue item: {}. It is in {}".format( queue_item.get(config.ID_FIELD), queue_item.get("state") ) ) continue # update the status of the item to in-progress queue_update = {"state": "in-progress", "transmit_started_at": utcnow()} publish_queue_service.patch(queue_item.get(config.ID_FIELD), queue_update) logger.info("Transmitting queue item {}".format(log_msg)) destination = queue_item["destination"] transmitter = superdesk.publish.registered_transmitters[destination.get("delivery_type")] transmitter.transmit(queue_item) logger.info("Transmitted queue item {}".format(log_msg)) except Exception as e: logger.exception("Failed to transmit queue item {}".format(log_msg)) max_retry_attempt = app.config.get("MAX_TRANSMIT_RETRY_ATTEMPT") retry_attempt_delay = app.config.get("TRANSMIT_RETRY_ATTEMPT_DELAY_MINUTES") try: orig_item = publish_queue_service.find_one(req=None, _id=queue_item["_id"]) updates = {config.LAST_UPDATED: utcnow()} if orig_item.get("retry_attempt", 0) < max_retry_attempt and not isinstance( e, PublishHTTPPushClientError ): updates["retry_attempt"] = orig_item.get("retry_attempt", 0) + 1 updates["state"] = QueueState.RETRYING.value updates["next_retry_attempt_at"] = utcnow() + timedelta(minutes=retry_attempt_delay) else: # all retry attempts exhausted marking the item as failed. updates["state"] = QueueState.FAILED.value publish_queue_service.system_update(orig_item.get(config.ID_FIELD), updates, orig_item) except: logger.error("Failed to set the state for failed publish queue item {}.".format(queue_item["_id"])) # Release the lock for the subscriber unlock(lock_name)
def update_times(self, doc): task = doc.get('task', {}) status = task.get('status', None) if status == 'in_progress': task.setdefault('started_at', utcnow()) if status == 'done': task.setdefault('finished_at', utcnow())
def update_times(self, doc): task = doc.get("task", {}) status = task.get("status", None) if status == "in_progress": task.setdefault("started_at", utcnow()) if status == "done": task.setdefault("finished_at", utcnow())
def test_remove_published_and_killed_content_separately(self): doc = self.articles[0] original = doc.copy() updates = {'targeted_for': [{'name': 'New South Wales', 'allow': True}]} get_resource_service(ARCHIVE).patch(id=original[config.ID_FIELD], updates=updates) original.update(updates) self._create_and_insert_into_versions(original, False) published_version_number = original[config.VERSION] + 1 get_resource_service(ARCHIVE_PUBLISH).patch(id=doc[config.ID_FIELD], updates={ITEM_STATE: CONTENT_STATE.PUBLISHED, config.VERSION: published_version_number}) published_service = get_resource_service(PUBLISHED) published_items = published_service.get(req=None, lookup=None) self.assertEqual(1, published_items.count()) article_in_production = get_resource_service(ARCHIVE).find_one(req=None, _id=original[config.ID_FIELD]) self.assertIsNotNone(article_in_production) self.assertEqual(article_in_production[ITEM_STATE], CONTENT_STATE.PUBLISHED) self.assertEqual(article_in_production[config.VERSION], published_version_number) insert_into_versions(doc=article_in_production) # Setting the expiry date of the published article to 1 hr back from now published_service.update_published_items( original[config.ID_FIELD], 'expiry', utcnow() + timedelta(minutes=-60)) # Killing the published article and inserting into archive_versions as unittests use service directly published_version_number += 1 get_resource_service(ARCHIVE_KILL).patch(id=doc[config.ID_FIELD], updates={ITEM_STATE: CONTENT_STATE.KILLED, config.VERSION: published_version_number}) # Executing the Expiry Job for the Published Article and asserting the collections RemoveExpiredPublishContent().run() published_items = published_service.get(req=None, lookup=None) self.assertEqual(1, published_items.count()) article_in_production = get_resource_service(ARCHIVE).find_one(req=None, _id=original[config.ID_FIELD]) self.assertIsNotNone(article_in_production) self.assertEqual(article_in_production[ITEM_STATE], CONTENT_STATE.KILLED) self.assertEqual(article_in_production[config.VERSION], published_version_number) insert_into_versions(doc=article_in_production) # Setting the expiry date of the killed article to 1 hr back from now and running the job again published_service.update_published_items( original[config.ID_FIELD], 'expiry', utcnow() + timedelta(minutes=-60)) RemoveExpiredPublishContent().run() published_items = published_service.get_other_published_items(str(original[config.ID_FIELD])) self.assertEqual(0, published_items.count()) article_in_production = get_resource_service(ARCHIVE).find_one(req=None, _id=original[config.ID_FIELD]) self.assertIsNone(article_in_production)
def run(self, provider=None): if provider: data = superdesk.json.loads(provider) data.setdefault('_created', utcnow()) data.setdefault('_updated', utcnow()) data.setdefault('name', data['type']) db = superdesk.get_db() db['ingest_providers'].save(data) return data
def on_update(self, updates, original): # put the published item from drafts at the top of the timeline if updates.get('post_status') == 'open' and original.get('post_status') == 'draft': updates['order'] = self.get_next_order_sequence() # if you publish a post from a draft it will only then have a published_date assign updates['published_date'] = utcnow() if original.get('post_status') == 'open' and updates.get('post_status') == 'draft': updates['unpublished_date'] = utcnow() super().on_update(updates, original)
def enqueue_item(published_item): """ Creates the corresponding entries in the publish queue for the given item """ published_item_id = ObjectId(published_item[config.ID_FIELD]) published_service = get_resource_service(PUBLISHED) archive_service = get_resource_service(ARCHIVE) published_update = {QUEUE_STATE: PUBLISH_STATE.IN_PROGRESS, 'last_queue_event': utcnow()} try: logger.info('Queueing item with id: {} and item_id: {}'.format(published_item_id, published_item['item_id'])) published_item = published_service.find_one(req=None, _id=published_item_id) if published_item.get(QUEUE_STATE) != PUBLISH_STATE.PENDING: logger.info('Queue State is not pending for published item {}. It is in {}'. format(published_item_id, published_item.get(QUEUE_STATE))) return if published_item.get(ITEM_STATE) == CONTENT_STATE.SCHEDULED: # if scheduled then change the state to published # change the `version` and `versioncreated` for the item # in archive collection and published collection. versioncreated = utcnow() item_updates = {'versioncreated': versioncreated, ITEM_STATE: CONTENT_STATE.PUBLISHED} resolve_document_version(document=item_updates, resource=ARCHIVE, method='PATCH', latest_doc={config.VERSION: published_item[config.VERSION]}) # update the archive collection archive_item = archive_service.find_one(req=None, _id=published_item['item_id']) archive_service.system_update(published_item['item_id'], item_updates, archive_item) # insert into version. insert_into_versions(published_item['item_id'], doc=None) # import to legal archive import_into_legal_archive.apply_async(countdown=3, kwargs={'item_id': published_item['item_id']}) logger.info('Modified the version of scheduled item: {}'.format(published_item_id)) logger.info('Publishing scheduled item_id: {}'.format(published_item_id)) # update the published collection published_update.update(item_updates) published_item.update({'versioncreated': versioncreated, ITEM_STATE: CONTENT_STATE.PUBLISHED, config.VERSION: item_updates[config.VERSION]}) published_service.patch(published_item_id, published_update) queued = get_enqueue_service(published_item[ITEM_OPERATION]).enqueue_item(published_item) # if the item is queued in the publish_queue then the state is "queued" # else the queue state is "queued_not_transmitted" queue_state = PUBLISH_STATE.QUEUED if queued else PUBLISH_STATE.QUEUED_NOT_TRANSMITTED published_service.patch(published_item_id, {QUEUE_STATE: queue_state}) logger.info('Queued item with id: {} and item_id: {}'.format(published_item_id, published_item['item_id'])) except KeyError: published_service.patch(published_item_id, {QUEUE_STATE: PUBLISH_STATE.PENDING}) logger.exception('No enqueue service found for operation %s', published_item[ITEM_OPERATION]) except: published_service.patch(published_item_id, {QUEUE_STATE: PUBLISH_STATE.PENDING}) raise
def test_import_into_legal_archive(self): archive_publish = get_resource_service('archive_publish') archive_correct = get_resource_service('archive_correct') legal_archive = get_resource_service('legal_archive') archive = get_resource_service('archive_publish') published = get_resource_service('published') publish_queue = get_resource_service('publish_queue') self.original_method = LegalArchiveImport.upsert_into_legal_archive LegalArchiveImport.upsert_into_legal_archive = MagicMock() for item in self.archive_items: archive_publish.patch(item['_id'], {'headline': 'publishing', 'abstract': 'publishing'}) for item in self.archive_items: legal_item = legal_archive.find_one(req=None, _id=item['_id']) self.assertIsNone(legal_item, 'Item: {} is not none.'.format(item['_id'])) archive_correct.patch(self.archive_items[1]['_id'], {'headline': 'correcting', 'abstract': 'correcting'}) LegalArchiveImport.upsert_into_legal_archive = self.original_method self.class_under_test().run(1) # items are not expired for item in self.archive_items: legal_item = legal_archive.find_one(req=None, _id=item['_id']) self.assertIsNone(legal_item, 'Item: {} is not none.'.format(item['_id'])) # expire the items for item in self.archive_items: original = archive.find_one(req=None, _id=item['_id']) archive.system_update(item['_id'], {'expiry': utcnow() - timedelta(minutes=30)}, original) published.update_published_items(item['_id'], 'expiry', utcnow() - timedelta(minutes=30)) # run the command after expiry self.class_under_test().run(1) # items are expired for item in self.archive_items: legal_item = legal_archive.find_one(req=None, _id=item['_id']) self.assertEqual(legal_item['_id'], item['_id'], 'item {} not imported to legal'.format(item['_id'])) # items are moved to legal for item in self.archive_items: published_items = list(published.get_other_published_items(item['_id'])) for published_item in published_items: self.assertEqual(published_item['moved_to_legal'], True) # items are moved to legal publish queue for item in self.archive_items: req = ParsedRequest() req.where = json.dumps({'item_id': item['_id']}) queue_items = list(publish_queue.get(req=req, lookup=None)) self.assertGreaterEqual(len(queue_items), 1) for queue_item in queue_items: self.assertEqual(queue_item['moved_to_legal'], True)
def setUp(self): init_app(self.app) self.incident[0]['start_date'] = utcnow() - timedelta(hours=10) self.incident[0]['end_date'] = utcnow() + timedelta(hours=100) self.incident[1]['start_date'] = utcnow() - timedelta(hours=10) self.incident[1]['end_date'] = utcnow() + timedelta(hours=100) self.app.data.insert('traffic_incidents', self.incident) self.app.data.insert('archive', [{'_id': 1}]) self.app.config['INIT_DATA_PATH'] = os.path.abspath( os.path.join(os.path.abspath(os.path.dirname(__file__)), '../../data'))
def test_utcnow(self): self.assertIsInstance(utcnow(), datetime) date1 = get_date(datetime.now(tz=utc)) date2 = utcnow() self.assertEqual(date1.year, date2.year) self.assertEqual(date1.month, date2.month) self.assertEqual(date1.day, date2.day) self.assertEqual(date1.hour, date2.hour) self.assertEqual(date1.minute, date2.minute) self.assertEqual(date1.second, date2.second)
def on_create(self, docs): # the same content can be published more than once # so it is necessary to have a new _id and preserve the original for doc in docs: doc['item_id'] = doc['_id'] doc['_created'] = utcnow() doc['versioncreated'] = utcnow() doc.pop('_id', None) doc.pop('lock_user', None) doc.pop('lock_time', None)
def _set_provider_status(self, doc, message=''): user = getattr(g, 'user', None) if doc.get('is_closed', True): doc['last_closed'] = doc.get('last_closed', {}) doc['last_closed']['closed_at'] = utcnow() doc['last_closed']['closed_by'] = user['_id'] if user else None doc['last_closed']['message'] = message else: doc['last_opened'] = doc.get('last_opened', {}) doc['last_opened']['opened_at'] = utcnow() doc['last_opened']['opened_by'] = user['_id'] if user else None
def test_get_utc_schedule(self): embargo_date = utcnow() + timedelta(minutes=10) content = {'embargo': embargo_date} utc_schedule = get_utc_schedule(content, 'embargo') self.assertEqual(utc_schedule, embargo_date)
def get_date(): return utcnow()
def date_header(datetime): return format_datetime(parse_date(datetime if datetime else utcnow()), 'EEEE, MMMM d, yyyy')
def generate_text_item(items, template_name, resource_type): template = get_resource_service('planning_export_templates').get_export_template(template_name, resource_type) archive_service = get_resource_service('archive') if not template: raise SuperdeskApiError.badRequestError('Invalid template selected') for item in items: # Create list of assignee with preference to coverage_provider, if not, assigned user item['published_archive_items'] = [] item['assignees'] = [] item['text_assignees'] = [] item['contacts'] = [] text_users = [] text_desks = [] users = [] desks = [] def enhance_coverage(planning, item, users): for c in (planning.get('coverages') or []): is_text = c.get('planning', {}).get('g2_content_type', '') == 'text' completed = (c.get('assigned_to') or {}).get('state') == ASSIGNMENT_WORKFLOW_STATE.COMPLETED assigned_to = c.get('assigned_to') or {} user = None desk = None if assigned_to.get('coverage_provider'): item['assignees'].append(assigned_to['coverage_provider']['name']) if is_text and not completed: item['text_assignees'].append(assigned_to['coverage_provider']['name']) elif assigned_to.get('user'): user = assigned_to['user'] users.append(user) elif assigned_to.get('desk'): desk = assigned_to.get('desk') desks.append(desk) # Get abstract from related text item if coverage is 'complete' if is_text: if completed: results = list(archive_service.get_from_mongo(req=None, lookup={ 'assignment_id': ObjectId( c['assigned_to']['assignment_id']), 'state': {'$in': ['published', 'corrected']}, 'pubstatus': 'usable', 'rewrite_of': None })) if len(results) > 0: item['published_archive_items'].append({ 'archive_text': get_first_paragraph_text(results[0].get('abstract')) or '', 'archive_slugline': results[0].get('slugline') or '' }) elif c.get('news_coverage_status', {}).get('qcode') == 'ncostat:int': if user: text_users.append(user) else: text_desks.append(desk) item['contacts'] = get_contacts_from_item(item) if resource_type == 'planning': enhance_coverage(item, item, users) else: for p in (item.get('plannings') or []): enhance_coverage(p, item, users) users = get_resource_service('users').find(where={ '_id': {'$in': users} }) desks = get_resource_service('desks').find(where={ '_id': {'$in': desks} }) for u in users: name = "{0} {1}".format(u.get('last_name'), u.get('first_name')) item['assignees'].append(name) if str(u['_id']) in text_users: item['text_assignees'].append(name) for d in desks: item['assignees'].append(d['name']) if str(d['_id']) in text_desks: item['text_assignees'].append(d['name']) set_item_place(item) item['description_text'] = item.get('description_text') or (item.get('event') or {}).get('definition_short') item['slugline'] = item.get('slugline') or (item.get('event') or {}).get('name') # Handle dates and remote time-zones if item.get('dates') or (item.get('event') or {}).get('dates'): dates = item.get('dates') or item.get('event').get('dates') item['schedule'] = utc_to_local(config.DEFAULT_TIMEZONE, dates.get('start')) if get_timezone_offset(config.DEFAULT_TIMEZONE, utcnow()) !=\ get_timezone_offset(dates.get('tz'), utcnow()): item['schedule'] = "{} ({})".format(item['schedule'].strftime('%H%M'), item['schedule'].tzname()) else: item['schedule'] = item['schedule'].strftime('%H%M') agendas = [] if resource_type == 'planning': agendas = group_items_by_agenda(items) inject_internal_converages(items) labels = {} cv = get_resource_service('vocabularies').find_one(req=None, _id='g2_content_type') if cv: labels = {_type['qcode']: _type['name'] for _type in cv['items']} for item in items: item['coverages'] = [labels.get(coverage.get('planning').get('g2_content_type'), coverage.get('planning').get('g2_content_type')) + (' (cancelled)' if coverage.get('workflow_status', '') == 'cancelled' else '') for coverage in item.get('coverages', []) if (coverage.get('planning') or {}).get('g2_content_type')] article = {} for key, value in template.items(): if value.endswith(".html"): article[key.replace('_template', '')] = render_template(value, items=items, agendas=agendas) else: article[key] = render_template_string(value, items=items, agendas=agendas) return article
def set_item_expiry(doc): expiry_minutes = app.settings.get('PLANNING_EXPIRY_MINUTES', None) if expiry_minutes is not None: doc[ITEM_EXPIRY] = utcnow() + timedelta(minutes=expiry_minutes) else: doc[ITEM_EXPIRY] = None
def setUp(self): try: from apps.legal_archive.commands import ImportLegalArchiveCommand except ImportError: self.fail("Could not import class under test (ImportLegalArchiveCommand).") else: self.class_under_test = ImportLegalArchiveCommand self.app.data.insert("desks", self.desks) self.app.data.insert("users", self.users) self.validators = [ {"schema": {}, "type": "text", "act": "publish", "_id": "publish_text"}, {"schema": {}, "type": "text", "act": "correct", "_id": "correct_text"}, {"schema": {}, "type": "text", "act": "kill", "_id": "kill_text"}, ] self.products = [ {"_id": "1", "name": "prod1"}, {"_id": "2", "name": "prod2", "codes": "abc,def"}, {"_id": "3", "name": "prod3", "codes": "xyz"}, ] self.subscribers = [ { "name": "Test", "is_active": True, "subscriber_type": "wire", "email": "*****@*****.**", "sequence_num_settings": {"max": 9999, "min": 1}, "products": ["1"], "destinations": [ { "name": "test", "delivery_type": "email", "format": "nitf", "config": {"recipients": "*****@*****.**"}, } ], } ] self.app.data.insert("validators", self.validators) self.app.data.insert("products", self.products) self.app.data.insert("subscribers", self.subscribers) self.class_under_test = ImportLegalArchiveCommand self.archive_items = [ { "task": {"desk": self.desks[0]["_id"], "stage": self.desks[0]["incoming_stage"], "user": "******"}, "_id": "item1", "state": "in_progress", "headline": "item 1", "type": "text", "slugline": "item 1 slugline", "_current_version": 1, "_created": utcnow() - timedelta(minutes=3), "expired": utcnow() - timedelta(minutes=30), }, { "task": {"desk": self.desks[0]["_id"], "stage": self.desks[0]["incoming_stage"], "user": "******"}, "_id": "item2", "state": "in_progress", "headline": "item 2", "type": "text", "slugline": "item 2 slugline", "_current_version": 1, "_created": utcnow() - timedelta(minutes=2), "expired": utcnow() - timedelta(minutes=30), }, { "task": {"desk": self.desks[0]["_id"], "stage": self.desks[0]["incoming_stage"], "user": "******"}, "_id": "item3", "state": "in_progress", "headline": "item 2", "type": "text", "slugline": "item 2 slugline", "_current_version": 1, "_created": utcnow() - timedelta(minutes=1), "expired": utcnow() - timedelta(minutes=30), }, ] get_resource_service(ARCHIVE).post(self.archive_items) for item in self.archive_items: resolve_document_version(item, ARCHIVE, "POST") insert_into_versions(id_=item["_id"])
def brief_internal_routing(item: dict, **kwargs): guid = item.get('guid', 'unknown') logger.info('macro started item=%s', guid) try: assert str(item['profile']) == str( _get_profile_id(TEXT_PROFILE)), 'profile is not text' assert get_word_count(item['body_html']) < 301, 'body is too long' # The title should not start with the word "CORRECTION" if item.get('headline'): title_start_with_correction = item['headline'].lstrip().startswith( 'CORRECTION') assert not title_start_with_correction, 'The headline/title should not start with word CORRECTION' except AssertionError as err: logger.info('macro stop on assert item=%s error=%s', guid, err) raise StopDuplication() except KeyError as err: logger.error(err) raise StopDuplication() item.setdefault('subject', []) item['urgency'] = 2 item['profile'] = _get_profile_id(BRIEF_PROFILE) item['subject'] = _get_product_subject( _get_brief_subject(item.get('subject', []))) item['status'] = CONTENT_STATE.SCHEDULED item['operation'] = 'publish' _fix_headline(item) _fix_body_html(item) UTC_FIELD = 'utc_{}'.format(PUBLISH_SCHEDULE) try: published_at = item[SCHEDULE_SETTINGS][UTC_FIELD] except KeyError: published_at = utcnow() item[SCHEDULE_SETTINGS] = { 'time_zone': 'Europe/Brussels', } # Set item publish schedule to 7:30 am for autopublish between 4 to 7 am is_press_headline = item.get( 'headline') and 'press' in item['headline'].lower() current_datetime = utc_to_local(superdesk.app.config['DEFAULT_TIMEZONE'], utcnow()) if is_press_headline and time(4, 00) <= current_datetime.time() <= time( 7, 00): item[PUBLISH_SCHEDULE] = current_datetime.replace(hour=7, minute=30, second=00) logger.info( 'Set publish schedule to 7:30 am for autopublish between 4 to 7 am item=%s', item.get('guid', 'unknown')) else: # schedule +30m item[PUBLISH_SCHEDULE] = utc_to_local( item[SCHEDULE_SETTINGS]['time_zone'], published_at + timedelta(minutes=30)) update_schedule_settings(item, PUBLISH_SCHEDULE, item[PUBLISH_SCHEDULE]) item[PUBLISH_SCHEDULE] = item[PUBLISH_SCHEDULE].replace(tzinfo=None) # remove text in () brackets along with brackets if item.get("headline"): title = re.sub(r"\([^()]*\)", "", item['headline']) item['headline'] = " ".join(title.split()) # publish try: internal_destination_auto_publish(item) except StopDuplication: logger.info('macro done item=%s', guid) except DocumentError as err: logger.error('validation error when creating brief item=%s error=%s', guid, err) except Exception as err: logger.exception(err) # avoid another item to be created raise StopDuplication()
def update_provider(provider, rule_set=None, routing_scheme=None, sync=False): """Fetch items from ingest provider, ingest them into Superdesk and update the provider. :param provider: Ingest Provider data :param rule_set: Translation Rule Set if one is associated with Ingest Provider. :param routing_scheme: Routing Scheme if one is associated with Ingest Provider. :param sync: Running in sync mode from cli. """ lock_name = get_lock_id('ingest', provider['name'], provider[superdesk.config.ID_FIELD]) if not lock(lock_name, expire=UPDATE_TTL + 10): if sync: logger.error('update is already running for %s', provider['name']) return try: feeding_service = get_feeding_service(provider['feeding_service']) update = {LAST_UPDATED: utcnow()} if sync: provider[LAST_UPDATED] = utcnow() - timedelta( days=9999) # import everything again generator = feeding_service.update(provider, update) if isinstance(generator, list): generator = (items for items in generator) failed = None while True: try: items = generator.send(failed) failed = ingest_items(items, provider, feeding_service, rule_set, routing_scheme) update_last_item_updated(update, items) except StopIteration: break # Some Feeding Services update the collection and by this time the _etag might have been changed. # So it's necessary to fetch it once again. Otherwise, OriginalChangedError is raised. ingest_provider_service = superdesk.get_resource_service( 'ingest_providers') provider = ingest_provider_service.find_one( req=None, _id=provider[superdesk.config.ID_FIELD]) ingest_provider_service.system_update( provider[superdesk.config.ID_FIELD], update, provider) if LAST_ITEM_UPDATE not in update and get_is_idle(provider): admins = superdesk.get_resource_service( 'users').get_users_by_user_type('administrator') notify_and_add_activity( ACTIVITY_EVENT, 'Provider {{name}} has gone strangely quiet. Last activity was on {{last}}', resource='ingest_providers', user_list=admins, name=provider.get('name'), last=provider[LAST_ITEM_UPDATE].replace( tzinfo=timezone.utc).astimezone(tz=None).strftime("%c")) logger.info('Provider {0} updated'.format( provider[superdesk.config.ID_FIELD])) if LAST_ITEM_UPDATE in update: # Only push a notification if there has been an update push_notification('ingest:update', provider_id=str( provider[superdesk.config.ID_FIELD])) except Exception as e: logger.error("Failed to ingest file: {error}".format(error=e)) raise IngestFileError(3000, e, provider) finally: unlock(lock_name)
def lock(self, item_filter, user_id, session_id, action): item_model = get_model(ItemModel) item = item_model.find_one(item_filter) # set the lock_id it per item lock_id = "item_lock {}".format(item.get(config.ID_FIELD)) if not item: raise SuperdeskApiError.notFoundError() # get the lock it not raise forbidden exception if not lock(lock_id, expire=5): raise SuperdeskApiError.forbiddenError(message="Item is locked by another user.") try: can_user_lock, error_message = self.can_lock(item, user_id, session_id) if can_user_lock: self.app.on_item_lock(item, user_id) updates = {LOCK_USER: user_id, LOCK_SESSION: session_id, 'lock_time': utcnow()} if action: updates['lock_action'] = action item_model.update(item_filter, updates) if item.get(TASK): item[TASK]['user'] = user_id else: item[TASK] = {'user': user_id} superdesk.get_resource_service('tasks').assign_user(item[config.ID_FIELD], item[TASK]) self.app.on_item_locked(item, user_id) push_notification('item:lock', item=str(item.get(config.ID_FIELD)), item_version=str(item.get(config.VERSION)), user=str(user_id), lock_time=updates['lock_time'], lock_session=str(session_id)) else: raise SuperdeskApiError.forbiddenError(message=error_message) item = item_model.find_one(item_filter) return item finally: # unlock the lock :) unlock(lock_id, remove=True)
class NewsMLG2Formatter(Formatter): """NewsML G2 Formatter""" XML_ROOT = '<?xml version="1.0" encoding="UTF-8"?>' now = utcnow() string_now = now.strftime('%Y-%m-%dT%H:%M:%S.0000Z') _message_nsmap = {None: 'http://iptc.org/std/nar/2006-10-01/', 'x': 'http://www.w3.org/1999/xhtml', 'xsi': 'http://www.w3.org/2001/XMLSchema-instance'} _debug_message_extra = {'{{{}}}schemaLocation'.format(_message_nsmap['xsi']): 'http://iptc.org/std/nar/2006-10-01/ \ http://www.iptc.org/std/NewsML-G2/2.18/specification/NewsML-G2_2.18-spec-All-Power.xsd'} def format(self, article, subscriber, codes=None): """Create article in NewsML G2 format :param dict article: :param dict subscriber: :param list codes: selector codes :return [(int, str)]: return a List of tuples. A tuple consist of publish sequence number and formatted article string. :raises FormatterError: if the formatter fails to format an article """ try: pub_seq_num = superdesk.get_resource_service('subscribers').generate_sequence_number(subscriber) is_package = self._is_package(article) news_message = etree.Element('newsMessage', attrib=self._debug_message_extra, nsmap=self._message_nsmap) self._format_header(article, news_message, pub_seq_num) item_set = self._format_item(news_message) if is_package: item = self._format_item_set(article, item_set, 'packageItem') self._format_groupset(article, item) elif article[ITEM_TYPE] in {CONTENT_TYPE.PICTURE, CONTENT_TYPE.AUDIO, CONTENT_TYPE.VIDEO}: item = self._format_item_set(article, item_set, 'newsItem') self._format_contentset(article, item) else: nitfFormater = NITFFormatter() nitf = nitfFormater.get_nitf(article, subscriber, pub_seq_num) newsItem = self._format_item_set(article, item_set, 'newsItem') self._format_content(article, newsItem, nitf) return [(pub_seq_num, self.XML_ROOT + etree.tostring(news_message).decode('utf-8'))] except Exception as ex: raise FormatterError.newmsmlG2FormatterError(ex, subscriber) def _is_package(self, article): """Given an article returns if it is a none takes package or not :param artcile: :return: True is package """ return article[ITEM_TYPE] == CONTENT_TYPE.COMPOSITE and article.get(PACKAGE_TYPE, '') == '' def _format_header(self, article, news_message, pub_seq_num): """Creates the header element of the newsMessage. :param dict article: :param Element news_message: :param int pub_seq_num: """ header = SubElement(news_message, 'header') SubElement(header, 'sent').text = self.string_now SubElement(header, 'sender').text = get_newsml_provider_id() SubElement(header, 'transmitId').text = str(pub_seq_num) SubElement(header, 'priority').text = str(article.get('priority', 5)) SubElement(header, 'origin').text = article.get('original_source', article.get('source', '')) def _format_item(self, news_message): return SubElement(news_message, 'itemSet') def _format_item_set(self, article, item_set, item_type): """Construct the item element (newsItem or packageItem) and append the item_meta and contentMeta entities :param dict article: :param element item_set: :param str item_type: """ item = SubElement(item_set, item_type, attrib={'standard': 'NewsML-G2', 'standardversion': '2.18', 'guid': article['guid'], 'version': str(article[superdesk.config.VERSION]), XML_LANG: article.get('language', 'en'), 'conformance': 'power'}) SubElement(item, 'catalogRef', attrib={'href': 'http://www.iptc.org/std/catalog/catalog.IPTC-G2-Standards_25.xml'}) self._format_rights(item, article) item_meta = SubElement(item, 'itemMeta') self._format_itemClass(article, item_meta) self._format_provider(item_meta) self._format_versioncreated(article, item_meta) self._format_firstcreated(article, item_meta) self._format_pubstatus(article, item_meta) if article.get(EMBARGO): SubElement(item_meta, 'embargoed').text = \ get_utc_schedule(article, EMBARGO).isoformat() # optional properties self._format_ednote(article, item_meta) self._format_signal(article, item_meta) content_meta = SubElement(item, 'contentMeta') SubElement(content_meta, 'urgency').text = str(article.get('urgency', 5)) self._format_timestamps(article, content_meta) self._format_creator(article, content_meta) self._format_located(article, content_meta) self._format_subject(article, content_meta) self._format_genre(article, content_meta) self._format_slugline(article, content_meta) self._format_headline(article, content_meta) self._format_place(article, content_meta) self._format_category(article, content_meta) self._format_company_codes(article, content_meta, item) if article[ITEM_TYPE] in {CONTENT_TYPE.PICTURE, CONTENT_TYPE.AUDIO, CONTENT_TYPE.VIDEO}: self._format_description(article, content_meta) self._format_creditline(article, content_meta) return item def _format_content(self, article, news_item, nitf): """Adds the content set to the xml :param dict article: :param Element newsItem: :param Element nitf: """ content_set = SubElement(news_item, 'contentSet') if article.get(FORMAT) == FORMATS.PRESERVED: inline_data = get_text(self.append_body_footer(article)) SubElement(content_set, 'inlineData', attrib={'contenttype': 'text/plain'}).text = inline_data elif article[ITEM_TYPE] in [CONTENT_TYPE.TEXT, CONTENT_TYPE.COMPOSITE]: inline = SubElement(content_set, 'inlineXML', attrib={'contenttype': 'application/nitf+xml'}) inline.append(nitf) def _format_rights(self, newsItem, article): """Adds the rightsholder section to the newsItem :param Element newsItem: :param dict article: """ rights = superdesk.get_resource_service('vocabularies').get_rightsinfo(article) rightsinfo = SubElement(newsItem, 'rightsInfo') holder = SubElement(rightsinfo, 'copyrightHolder') SubElement(holder, 'name').text = rights['copyrightholder'] SubElement(rightsinfo, 'copyrightNotice').text = rights['copyrightnotice'] SubElement(rightsinfo, 'usageTerms').text = rights['usageterms'] # itemClass elements def _format_itemClass(self, article, item_meta): """Append the item class to the item_meta data element :param dict article: :param Element item_meta: """ if CONTENT_TYPE.COMPOSITE and self._is_package(article): SubElement(item_meta, 'itemClass', attrib={'qcode': 'ninat:composite'}) return if article[ITEM_TYPE] in {CONTENT_TYPE.TEXT, CONTENT_TYPE.PREFORMATTED, CONTENT_TYPE.COMPOSITE}: SubElement(item_meta, 'itemClass', attrib={'qcode': 'ninat:text'}) elif article[ITEM_TYPE] in {CONTENT_TYPE.PICTURE, CONTENT_TYPE.AUDIO, CONTENT_TYPE.VIDEO}: SubElement(item_meta, 'itemClass', attrib={'qcode': 'ninat:%s' % article[ITEM_TYPE].lower()}) def _format_provider(self, item_meta): """Appends the provider element to the item_meta element :param dict article: :param Element item_meta: """ provider = SubElement(item_meta, 'provider') SubElement(provider, 'name').text = get_newsml_provider_id() def _format_versioncreated(self, article, item_meta): """Appends the versionCreated element to the item_meta element. :param dict article: :param Element item_meta: """ SubElement(item_meta, 'versionCreated').text = article['versioncreated'].strftime('%Y-%m-%dT%H:%M:%S+00:00') def _format_firstcreated(self, article, item_meta): """Appends the firstCreated element to the item_meta element. :param dict article: :param Element item_meta: """ SubElement(item_meta, 'firstCreated').text = article['firstcreated'].strftime('%Y-%m-%dT%H:%M:%S+00:00') def _format_pubstatus(self, article, item_meta): """Appends the pubStatus element to the item_meta element. :param dict article: :param Element item_meta: """ SubElement(item_meta, 'pubStatus', attrib={'qcode': 'stat:' + article.get('pubstatus', 'usable')}) def _format_signal(self, article, item_meta): """Appends the signal element to the item_meta element. :param dict article: :param Element item_meta: """ if article['state'] == 'Corrected': SubElement(item_meta, 'signal', attrib={'qcode': 'sig:correction'}) else: SubElement(item_meta, 'signal', attrib={'qcode': 'sig:update'}) def _format_ednote(self, article, item_meta): """Appends the edNote element to the item_meta element. :param dict article: :param Element item_meta: """ if 'ednote' in article and article.get('ednote', '') != '': SubElement(item_meta, 'edNote').text = article.get('ednote', '') # contentMeta elements def _format_timestamps(self, article, content_meta): """Appends the contentCreated and contentModified element to the contentMeta element. :param dict article: :param Element content_meta: """ SubElement(content_meta, 'contentCreated').text = article['firstcreated'].strftime('%Y-%m-%dT%H:%M:%S+00:00') SubElement(content_meta, 'contentModified').text = article['versioncreated'].strftime('%Y-%m-%dT%H:%M:%S+00:00') def _format_creator(self, article, content_meta): """Appends the creator element to the contentMeta element :param dict article: :param Element content_meta: """ if 'byline' in article: creator = SubElement(content_meta, 'creator') SubElement(creator, 'name').text = article.get('byline', '') or '' def _format_subject(self, article, content_meta): """Appends the subject element to the contentMeta element :param dict article: :param Element content_meta: """ if 'subject' in article and len(article['subject']) > 0: for s in article['subject']: if 'qcode' in s: subj = SubElement(content_meta, 'subject', attrib={'type': 'cpnat:abstract', 'qcode': 'subj:' + s['qcode']}) SubElement(subj, 'name', attrib={XML_LANG: 'en'}).text = s['name'] def _format_genre(self, article, content_meta): """Appends the genre element to the contentMeta element :param dict article: :param Element content_meta: """ if 'genre' in article and len(article['genre']) > 0: for g in article['genre']: genre = SubElement(content_meta, 'genre') SubElement(genre, 'name', attrib={XML_LANG: 'en'}).text = g.get('name', '') def _format_category(self, article, content_meta): """Appends the subject element to the contentMeta element :param dict article: :param Element content_meta: """ for category in article.get('anpa_category', []): subject = SubElement(content_meta, 'subject', attrib={'type': 'cpnat:abstract', 'qcode': 'cat:' + category['qcode']}) SubElement(subject, 'name', attrib={XML_LANG: 'en'}).text = category.get('name', '') def _format_slugline(self, article, content_meta): """Appends the slugline element to the contentMeta element :param dict article: :param Element content_meta: """ SubElement(content_meta, 'slugline').text = article.get('slugline', '') def _format_headline(self, article, content_meta): """Appends the headline element to the contentMeta element :param dict article: :param Element content_meta: """ SubElement(content_meta, 'headline').text = article.get('headline', '') def _format_place(self, article, content_meta): """Appends the subject (of type geoArea) element to the contentMeta element :param dict article: :param Element content_meta: """ if not article.get('place'): return for place in article.get('place', []): if place.get('state'): subject = self._create_subject_element(content_meta, place.get('state'), 'loctyp:CountryArea') self._create_broader_element(subject, place.get('country'), 'loctyp:Country') self._create_broader_element(subject, place.get('world_region'), 'loctyp:WorldArea') elif place.get('country'): subject = self._create_subject_element(content_meta, place.get('country'), 'loctyp:Country') self._create_broader_element(subject, place.get('world_region'), 'loctyp:WorldArea') elif place.get('world_region'): self._create_subject_element(content_meta, place.get('world_region'), 'loctyp:WorldArea') def _create_broader_element(self, parent, broader_name, qcode, concept_type='cpnat:abstract'): """Create broader element. :param element parent: parent element under which the broader element is created :param str broader_name: value for the name element :param str qcode: :param str concept_type: """ if broader_name: broader_elm = SubElement(parent, 'broader', attrib={'type': concept_type, 'qcode': qcode}) SubElement(broader_elm, 'name').text = broader_name def _create_subject_element(self, parent, subject_name, qcode, concept_type='cpnat:abstract'): """Create a subject element :param element parent: :param str subject_name: value for the name element :param str qcode: :param str concept_type: :return: returns the subject element. """ subject_elm = SubElement(parent, 'subject', attrib={'type': concept_type, 'qcode': qcode}) SubElement(subject_elm, 'name').text = subject_name return subject_elm def _format_located(self, article, content_meta): """Appends the located element to the contentMeta element :param dict article: :param Element content_meta: """ located = article.get('dateline', {}).get('located', {}) if located and located.get('city'): located_elm = SubElement(content_meta, 'located', attrib={'type': 'cpnat:abstract', 'qcode': 'loctyp:City'}) SubElement(located_elm, "name").text = located.get('city') self._create_broader_element(located_elm, located.get('state'), 'loctyp:CountryArea') self._create_broader_element(located_elm, located.get('country'), 'loctyp:Country') if article.get('dateline', {}).get('text', {}): SubElement(content_meta, 'dateline').text = article.get('dateline', {}).get('text', {}) def _format_description(self, article, content_meta): """Appends the image description to the contentMeta element :param article: :param content_meta: """ SubElement(content_meta, 'description', attrib={'role': 'drol:caption'}).text = article.get('description', '') def _format_creditline(self, article, content_meta): """Append the creditLine to the contentMeta for a picture :param article: :param content_meta: """ SubElement(content_meta, 'creditline').text = article.get('original_source', article.get('source', '')) def _format_groupset(self, article, item): """Constructs the groupSet element of a packageItem :param article: :param item: :return: groupSet appended to the item """ groupSet = SubElement(item, 'groupSet', attrib={'root': 'root'}) for group in article.get(GROUPS, []): group_Elem = SubElement(groupSet, 'group', attrib={'id': group.get(GROUP_ID), 'role': group.get(ROLE)}) for ref in group.get(REFS, []): if ID_REF in ref: SubElement(group_Elem, 'groupRef', attrib={'idref': ref.get(ID_REF)}) else: if RESIDREF in ref: # get the current archive item being refered to archive_item = superdesk.get_resource_service(ARCHIVE).find_one(req=None, _id=ref.get(RESIDREF)) if archive_item: itemRef = SubElement(group_Elem, 'itemRef', attrib={'residref': ref.get(RESIDREF), 'contenttype': 'application/vnd.iptc.g2.newsitem+xml'}) SubElement(itemRef, 'itemClass', attrib={'qcode': 'ninat:' + ref.get(ITEM_TYPE, 'text')}) self._format_pubstatus(archive_item, itemRef) self._format_headline(archive_item, itemRef) self._format_slugline(archive_item, itemRef) def _format_contentset(self, article, item): """Constructs the contentSet element in a picture, video and audio newsItem. :param article: :param item: :return: contentSet Element added to the item """ content_set = SubElement(item, 'contentSet') for rendition, value in article.get('renditions', {}).items(): attrib = {'href': value.get('href'), 'contenttype': value.get('mimetype', ''), 'rendition': 'rendition:' + rendition } if article.get(ITEM_TYPE) == CONTENT_TYPE.PICTURE: if 'height' in value: attrib['height'] = str(value.get('height')) if 'width' in value: attrib['width'] = str(value.get('width')) elif article.get(ITEM_TYPE) in {CONTENT_TYPE.VIDEO, CONTENT_TYPE.AUDIO}: if get_filemeta(article, 'width'): attrib['width'] = str(get_filemeta(article, 'width')) if get_filemeta(article, 'height'): attrib['height'] = str(get_filemeta(article, 'height')) if get_filemeta(article, 'duration'): attrib['duration'] = get_filemeta(article, 'duration') attrib['durationunit'] = 'timeunit:normalPlayTime' if rendition == 'original' and get_filemeta(article, 'length'): attrib['size'] = str(get_filemeta(article, 'length')) SubElement(content_set, 'remoteContent', attrib=attrib) def _format_company_codes(self, article, content_meta, item): """Format copy codes. For each company in the article, appends the subject element to the contentMeta element and assert element to item :param article: object having published article details :type article: dict :param content_meta: object representing <contentMeta> in the XML tree :type content_meta: lxml.etree.Element :param item: object representing <newsItem> in the XML tree :type item: lxml.etree.Element """ for company in article.get('company_codes', []): literal_name = company['qcode'] subject = SubElement(content_meta, 'subject', attrib={'type': 'cpnat:organisation', 'literal': literal_name}) SubElement(subject, 'name').text = company.get('name', '') assert_element = SubElement(item, 'assert', attrib={'literal': literal_name}) org_details_element = SubElement(assert_element, 'organisationDetails') SubElement(org_details_element, 'hasInstrument', attrib={'symbol': company.get('qcode', ''), 'marketlabel': company.get('security_exchange', '')}) def can_format(self, format_type, article): """Method check if the article can be formatted to NewsML G2 or not. :param str format_type: :param dict article: :return: True if article can formatted else False """ return format_type == 'newsmlg2' and \ article[ITEM_TYPE] in {CONTENT_TYPE.TEXT, CONTENT_TYPE.PREFORMATTED, CONTENT_TYPE.COMPOSITE, CONTENT_TYPE.PICTURE, CONTENT_TYPE.VIDEO, CONTENT_TYPE.AUDIO}
class RemoveSpikedContentTestCase(TestCase): articles = [{'guid': 'tag:localhost:2015:69b961ab-2816-4b8a-a584-a7b402fed4f9', '_id': '1', 'type': 'text', 'last_version': 3, '_current_version': 4, 'body_html': 'Test body', 'urgency': 4, 'headline': 'Two students missing', 'pubstatus': 'usable', 'firstcreated': utcnow(), 'byline': 'By Alan Karben', 'ednote': 'Andrew Marwood contributed to this article', 'keywords': ['Student', 'Crime', 'Police', 'Missing'], 'subject':[{'qcode': '17004000', 'name': 'Statistics'}, {'qcode': '04001002', 'name': 'Weather'}], 'state': 'draft', 'expiry': utcnow() + timedelta(minutes=20), 'unique_name': '#1'}, {'guid': 'tag:localhost:2015:69b961ab-2816-4b8a-a974-xy4532fe33f9', '_id': '2', 'last_version': 3, '_current_version': 4, 'body_html': 'Test body of the second article', 'urgency': 4, 'headline': 'Another two students missing', 'pubstatus': 'usable', 'firstcreated': utcnow(), 'byline': 'By Alan Karben', 'ednote': 'Andrew Marwood contributed to this article', 'keywords': ['Student', 'Crime', 'Police', 'Missing'], 'subject':[{'qcode': '17004000', 'name': 'Statistics'}, {'qcode': '04001002', 'name': 'Weather'}], 'expiry': utcnow() + timedelta(minutes=20), 'state': 'draft', 'type': 'text', 'unique_name': '#2'}, {'guid': 'tag:localhost:2015:69b961ab-2816-4b8a-a584-a7b402fed4fa', '_id': '3', '_current_version': 4, 'body_html': 'Test body', 'urgency': 4, 'headline': 'Two students missing killed', 'pubstatus': 'usable', 'firstcreated': utcnow(), 'byline': 'By Alan Karben', 'ednote': 'Andrew Marwood contributed to this article killed', 'keywords': ['Student', 'Crime', 'Police', 'Missing'], 'subject':[{'qcode': '17004000', 'name': 'Statistics'}, {'qcode': '04001002', 'name': 'Weather'}], 'state': 'draft', 'expiry': utcnow() + timedelta(minutes=20), 'type': 'text', 'unique_name': '#3'}, {'guid': 'tag:localhost:2015:69b961ab-2816-4b8a-a584-a7b402fed4fc', '_id': '4', '_current_version': 3, 'state': 'draft', 'type': 'composite', 'groups': [{'id': 'root', 'refs': [{'idRef': 'main'}], 'role': 'grpRole:NEP'}, { 'id': 'main', 'refs': [ { 'location': 'archive', 'guid': '1', 'residRef': '1', 'type': 'text' }, { 'location': 'archive', 'residRef': '2', 'guid': '2', 'type': 'text' } ], 'role': 'grpRole:main'}], 'firstcreated': utcnow(), 'expiry': utcnow() + timedelta(minutes=20), 'unique_name': '#4'}, {'guid': 'tag:localhost:2015:69b961ab-4b8a-a584-2816-a7b402fed4fc', '_id': '5', '_current_version': 3, 'state': 'draft', 'type': 'composite', 'groups': [{'id': 'root', 'refs': [{'idRef': 'main'}, {'idRef': 'story'}], 'role': 'grpRole:NEP'}, { 'id': 'main', 'refs': [ { 'location': 'archive', 'guid': '1', 'residRef': '1', 'type': 'text' } ], 'role': 'grpRole:main'}, { 'id': 'story', 'refs': [ { 'location': 'archive', 'guid': '4', 'residRef': '4', 'type': 'composite' } ], 'role': 'grpRole:story'}], 'firstcreated': utcnow(), 'expiry': utcnow() + timedelta(minutes=20), 'unique_name': '#5'}] media = { 'viewImage': { 'media': '1592730d582080f4e9fcc2fcf43aa357bda0ed19ffe314ee3248624cd4d4bc54', 'mimetype': 'image/jpeg', 'href': 'http://192.168.220.209/api/upload/abc/raw?_schema=http', 'height': 452, 'width': 640 }, 'thumbnail': { 'media': '52250b4f37da50ee663fdbff057a5f064479f8a8bbd24fb8fdc06135d3f807bb', 'mimetype': 'image/jpeg', 'href': 'http://192.168.220.209/api/upload/abc/raw?_schema=http', 'height': 120, 'width': 169 }, 'baseImage': { 'media': '7a608aa8f51432483918027dd06d0ef385b90702bfeba84ac4aec38ed1660b18', 'mimetype': 'image/jpeg', 'href': 'http://192.168.220.209/api/upload/abc/raw?_schema=http', 'height': 990, 'width': 1400 }, 'original': { 'media': 'stub.jpeg', 'mimetype': 'image/jpeg', 'href': 'http://192.168.220.209/api/upload/stub.jpeg/raw?_schema=http', 'height': 2475, 'width': 3500 } } def test_query_getting_expired_content(self): now = utcnow() self.app.data.insert(ARCHIVE, [ {'expiry': get_expiry_date(0), 'state': 'spiked'}, {'expiry': get_expiry_date(10), 'state': 'spiked'}, {'expiry': get_expiry_date(20), 'state': 'spiked'}, {'expiry': get_expiry_date(30), 'state': 'spiked'}, {'expiry': None, 'state': 'spiked'}, {'unique_id': 97, 'state': 'spiked'}, {'expiry': now - timedelta(minutes=10), 'state': 'spiked', 'unique_id': 100}, ]) expired_items = get_resource_service(ARCHIVE).get_expired_items(now) now = utcnow() for expired_items in get_resource_service(ARCHIVE).get_expired_items(now): self.assertEquals(1, len(expired_items)) self.assertEquals(100, expired_items[0]['unique_id']) def test_remove_media_files_for_picture(self): item = { '_id': 'testimage', 'type': 'picture', 'renditions': self.media } original = item.copy() with patch.object(self.app.media, 'delete') as media_delete: CropService().update_media_references(item, original) references_service = get_resource_service('media_references') refs = references_service.get(req=None, lookup={'item_id': 'testimage'}) self.assertEqual(refs.count(), 4) for ref in refs: self.assertEqual(ref.get('published'), False) CropService().update_media_references(item, original, True) refs = references_service.get(req=None, lookup={'item_id': 'testimage'}) for ref in refs: self.assertEqual(ref.get('published'), True) remove_media_files(item) self.assertEqual(0, media_delete.call_count) item = { '_id': 'testimage2', 'type': 'picture', 'renditions': self.media } original = item.copy() CropService().update_media_references(item, original) references_service = get_resource_service('media_references') refs = references_service.get(req=None, lookup={'item_id': 'testimage2'}) self.assertEqual(refs.count(), 4) for ref in refs: self.assertEqual(ref.get('published'), False) remove_media_files(item) self.assertEqual(0, media_delete.call_count) item = { '_id': 'testimage3', 'type': 'picture', 'renditions': { 'viewImage': { 'media': '123', 'mimetype': 'image/jpeg', 'href': 'http://192.168.220.209/api/upload/abc/raw?_schema=http', 'height': 452, 'width': 640 }, 'thumbnail': { 'media': '456', 'mimetype': 'image/jpeg', 'href': 'http://192.168.220.209/api/upload/abc/raw?_schema=http', 'height': 120, 'width': 169 } } } original = item.copy() CropService().update_media_references(item, original) references_service = get_resource_service('media_references') refs = references_service.get(req=None, lookup={'item_id': 'testimage3'}) self.assertEqual(refs.count(), 2) for ref in refs: self.assertEqual(ref.get('published'), False) remove_media_files(item) self.assertEqual(2, media_delete.call_count) for key, rendition in item.get('renditions').items(): media_delete.assert_any_call(rendition['media']) def test_remove_media_files_for_picture_associations(self): item = { '_id': 'testimage', 'type': 'text', 'associations': { 'featuremedia': { '_id': '123', 'renditions': self.media }, 'featurevideo': { '_id': '456', 'renditions': { 'viewImage': { 'media': 'testing_123', 'mimetype': 'image/jpeg', 'href': 'http://192.168.220.209/api/upload/abc/raw?_schema=http', 'height': 452, 'width': 640 }, 'thumbnail': { 'media': 'testing_456', 'mimetype': 'image/jpeg', 'href': 'http://192.168.220.209/api/upload/abc/raw?_schema=http', 'height': 120, 'width': 169 } } } } } original = item.copy() with patch.object(self.app.media, 'delete') as media_delete: CropService().update_media_references(item, original) references_service = get_resource_service('media_references') refs = references_service.get(req=None, lookup={'item_id': 'testimage'}) self.assertEqual(refs.count(), 6) for ref in refs: self.assertEqual(ref.get('published'), False) CropService().update_media_references(item, original, True) refs = references_service.get(req=None, lookup={'item_id': 'testimage'}) for ref in refs: self.assertEqual(ref.get('published'), True) remove_media_files(item) self.assertEqual(0, media_delete.call_count) def test_delete_by_ids(self): ids = self.app.data.insert(ARCHIVE, self.articles) archive_service = get_resource_service(ARCHIVE) archive_service.on_delete = MagicMock() archive_service.delete_by_article_ids(ids) self.assertTrue(self.app.data.mongo.is_empty(ARCHIVE)) self.assertTrue(self.app.data.elastic.is_empty(ARCHIVE)) self.assertEqual(len(self.articles), archive_service.on_delete.call_count) def test_remove_renditions_from_all_versions(self): renditions = copy.copy(self.media) ids = self.app.data.insert(ARCHIVE, [{ 'state': 'spiked', 'expiry': get_expiry_date(-10), 'type': 'picture', 'renditions': {}, }]) self.app.data.insert('archive_versions', [{ '_id_document': ids[0], 'type': 'picture', 'renditions': renditions, }]) with patch.object(self.app.media, 'delete') as media_delete: get_resource_service('archive').delete_by_article_ids(ids) for key, rendition in renditions.items(): media_delete.assert_any_call(rendition['media']) def _get_original(self, _id): return self.app.data.find_one(ARCHIVE, None, _id=_id)
def fetch(self, docs, id=None, **kwargs): id_of_fetched_items = [] for doc in docs: id_of_item_to_be_fetched = doc.get( config.ID_FIELD) if id is None else id desk_id = doc.get('desk') stage_id = doc.get('stage') ingest_service = get_resource_service('ingest') ingest_doc = ingest_service.find_one(req=None, _id=id_of_item_to_be_fetched) if not ingest_doc: raise SuperdeskApiError.notFoundError( _('Fail to found ingest item with _id: {id}').format( id=id_of_item_to_be_fetched)) if not is_workflow_state_transition_valid('fetch_from_ingest', ingest_doc[ITEM_STATE]): raise InvalidStateTransitionError() if doc.get('macro'): # there is a macro so transform it ingest_doc = get_resource_service('macros').execute_macro( ingest_doc, doc.get('macro')) archived = utcnow() ingest_service.patch(id_of_item_to_be_fetched, {'archived': archived}) dest_doc = dict(ingest_doc) if doc.get('target'): dest_doc.update(doc.get('target')) new_id = generate_guid(type=GUID_TAG) id_of_fetched_items.append(new_id) dest_doc[config.ID_FIELD] = new_id dest_doc[GUID_FIELD] = new_id generate_unique_id_and_name(dest_doc) dest_doc[config.VERSION] = 1 dest_doc['versioncreated'] = archived send_to(doc=dest_doc, desk_id=desk_id, stage_id=stage_id) dest_doc[ITEM_STATE] = doc.get(ITEM_STATE, CONTENT_STATE.FETCHED) dest_doc[FAMILY_ID] = ingest_doc[config.ID_FIELD] dest_doc[INGEST_ID] = self.__strip_version_from_guid( ingest_doc[GUID_FIELD], ingest_doc.get('version')) dest_doc[INGEST_VERSION] = ingest_doc.get('version') dest_doc[ITEM_OPERATION] = ITEM_FETCH remove_unwanted(dest_doc) set_original_creator(dest_doc) self.__fetch_items_in_package( dest_doc, desk_id, stage_id, doc.get(ITEM_STATE, CONTENT_STATE.FETCHED)) self.__fetch_associated_items( dest_doc, desk_id, stage_id, doc.get(ITEM_STATE, CONTENT_STATE.FETCHED)) desk = get_resource_service('desks').find_one(req=None, _id=desk_id) if desk and desk.get('default_content_profile'): dest_doc['profile'] = desk['default_content_profile'] if dest_doc.get('type', 'text') in MEDIA_TYPES: dest_doc['profile'] = None get_resource_service(ARCHIVE).post([dest_doc]) insert_into_versions(doc=dest_doc) build_custom_hateoas(custom_hateoas, dest_doc) superdesk.item_fetched.send(self, item=dest_doc, ingest_item=ingest_doc) doc.update(dest_doc) if kwargs.get('notify', True): ingest_doc.update({'task': dest_doc.get('task')}) push_item_move_notification(ingest_doc, doc, 'item:fetch') return id_of_fetched_items
def _can_remove_item(self, item, processed_item=None): """Recursively checks if the item can be removed. :param dict item: item to be remove :param set processed_item: processed items :return: True if item can be removed, False otherwise. """ if processed_item is None: processed_item = dict() item_refs = [] package_service = PackageService() archive_service = get_resource_service(ARCHIVE) if item.get(ITEM_TYPE) == CONTENT_TYPE.COMPOSITE: # Get the item references for is package item_refs = package_service.get_residrefs(item) if item.get(ITEM_TYPE) in [ CONTENT_TYPE.TEXT, CONTENT_TYPE.PREFORMATTED ]: broadcast_items = get_resource_service( 'archive_broadcast').get_broadcast_items_from_master_story( item) # If master story expires then check if broadcast item is included in a package. # If included in a package then check the package expiry. item_refs.extend([ broadcast_item.get(config.ID_FIELD) for broadcast_item in broadcast_items ]) if item.get('rewrite_of'): item_refs.append(item.get('rewrite_of')) if item.get('rewritten_by'): item_refs.append(item.get('rewritten_by')) # get the list of associated item ids if item.get(ITEM_TYPE) in MEDIA_TYPES: item_refs.extend(self._get_associated_items(item)) # get item reference where this referred item_refs.extend(package_service.get_linked_in_package_ids(item)) # check item refs in the ids to remove set is_expired = item.get('expiry') and item.get('expiry') < utcnow() if is_expired: # now check recursively for all references if item.get(config.ID_FIELD) in processed_item: return is_expired processed_item[item.get(config.ID_FIELD)] = item if item_refs: archive_items = archive_service.get_from_mongo( req=None, lookup={'_id': { '$in': item_refs }}) for archive_item in archive_items: is_expired = self._can_remove_item(archive_item, processed_item) if not is_expired: break return is_expired
def test_is_old_content(self): service = FileFeedingService() self.assertFalse(service.is_old_content(utcnow())) self.assertTrue( service.is_old_content(utcnow() - timedelta(minutes=11)))
def test_validate_schedule_date_with_datetime_in_past_raises_superdeskApiError( self): self.assertRaises(SuperdeskApiError, validate_schedule, utcnow() + timedelta(hours=-2))
def _get_planning_date_filters(self, request): """Get date filters for planning resource :param request: object representing the HTTP request """ params = request.args or MultiDict() date_filter_param, start_date, end_date = self._parse_date_params(params) if not (date_filter_param or start_date or end_date): return { 'nested': { 'path': '_planning_schedule', 'filter': { 'range': { '_planning_schedule.scheduled': { 'gte': 'now/d', 'time_zone': get_timezone_offset(config.DEFAULT_TIMEZONE, utcnow()) } } } } } start_of_week = self._get_start_of_week(params) date_filters = { 'range': { '_planning_schedule.scheduled': { 'time_zone': get_timezone_offset(config.DEFAULT_TIMEZONE, utcnow()) } } } if date_filter_param.lower() == 'today': date_filters['range']['_planning_schedule.scheduled']['gte'] = 'now/d' date_filters['range']['_planning_schedule.scheduled']['lt'] = 'now+24h/d' elif date_filter_param.lower() == 'tomorrow': date_filters['range']['_planning_schedule.scheduled']['gte'] = 'now+24h/d' date_filters['range']['_planning_schedule.scheduled']['lt'] = 'now+48h/d' elif date_filter_param.lower() == 'this_week': end_of_this_week = get_start_of_next_week(None, start_of_week) start_of_this_week = end_of_this_week - timedelta(days=7) date_filters['range']['_planning_schedule.scheduled']['gte'] = \ '{}||/d'.format(start_of_this_week.strftime(config.ELASTIC_DATE_FORMAT)) date_filters['range']['_planning_schedule.scheduled']['lt'] = \ '{}||/d'.format(end_of_this_week.strftime(config.ELASTIC_DATE_FORMAT)) elif date_filter_param.lower() == 'next_week': start_of_next_week = get_start_of_next_week(None, start_of_week) end_of_next_week = start_of_next_week + timedelta(days=7) date_filters['range']['_planning_schedule.scheduled']['gte'] = \ '{}||/d'.format(start_of_next_week.strftime(config.ELASTIC_DATE_FORMAT)) date_filters['range']['_planning_schedule.scheduled']['lt'] = \ '{}||/d'.format(end_of_next_week.strftime(config.ELASTIC_DATE_FORMAT)) else: if start_date: date_filters['range']['_planning_schedule.scheduled']['gte'] = start_date if end_date: date_filters['range']['_planning_schedule.scheduled']['lte'] = end_date return { 'nested': { 'path': '_planning_schedule', 'filter': date_filters, } }
def test_validate_schedule_at_utc_zero_hours(self): validate_schedule( (utcnow() + timedelta(days=1)).replace(hour=0, minute=0, second=0, microsecond=0))
def test_text_formatter(self): embargo_ts = (utcnow() + timedelta(days=2)) article = { '_id': 'tag:aap.com.au:20150613:12345', 'guid': 'tag:aap.com.au:20150613:12345', '_current_version': 1, 'anpa_category': [{ 'qcode': 'a' }], 'source': 'AAP', 'headline': 'This is a test headline', 'byline': 'joe', 'slugline': 'slugline', 'subject': [{ 'qcode': '02011001', 'name': 'international court or tribunal', 'parent': None }, { 'qcode': '02011002', 'name': 'extradition' }], 'anpa_take_key': 'take_key', 'unique_id': '1', 'body_html': 'The story body', 'type': 'text', 'word_count': '1', 'priority': 1, 'profile': 'snap', 'state': 'published', 'urgency': 2, 'pubstatus': 'usable', 'creditline': 'sample creditline', 'keywords': ['traffic'], 'abstract': '<p>sample <b>abstract</b></p>', 'place': [{ 'name': 'Australia', 'qcode': 'NSW' }], 'embargo': embargo_ts, 'body_footer': '<p>call helpline 999 if you are planning to quit smoking</p>', 'company_codes': [{ 'name': 'YANCOAL AUSTRALIA LIMITED', 'qcode': 'YAL', 'security_exchange': 'ASX' }], 'genre': [{ 'name': 'Article', 'qcode': 'article' }], 'flags': { 'marked_for_legal': True }, 'extra': { 'foo': 'test' }, } seq, doc = self.formatter.format(article, {'name': 'Test Subscriber'})[0] expected = { "guid": "tag:aap.com.au:20150613:12345", "version": "1", "place": [{ 'name': 'Australia', 'code': 'NSW' }], "pubstatus": "usable", "body_html": "The story body<p>call helpline 999 if you are planning to quit smoking</p>", "type": "text", "subject": [{ "code": "02011001", "name": "international court or tribunal" }, { "code": "02011002", "name": "extradition" }], "service": [{ "code": "a" }], "source": "AAP", "headline": "This is a test headline", "byline": "joe", "urgency": 2, "priority": 1, "embargoed": embargo_ts.isoformat(), "profile": "snap", "slugline": "slugline", "description_text": "sample abstract", "description_html": "<p>sample <b>abstract</b></p>", 'keywords': ['traffic'], 'organisation': [{ 'name': 'YANCOAL AUSTRALIA LIMITED', 'rel': 'Securities Identifier', 'symbols': [{ 'ticker': 'YAL', 'exchange': 'ASX' }] }], 'genre': [{ 'name': 'Article', 'code': 'article' }], 'signal': [{ 'name': 'Content Warning', 'code': 'cwarn', 'scheme': 'http://cv.iptc.org/newscodes/signal/' }], 'extra': { 'foo': 'test' }, } self.assertEqual(json.loads(doc), expected)
def setUp(self): self.req = ParsedRequest() with self.app.test_request_context(self.app.config.get('URL_PREFIX')): self.articles = [{ '_id': '1', 'urgency': 1, 'headline': 'story', 'state': 'fetched' }, { '_id': '2', 'headline': 'prtorque', 'state': 'fetched' }, { '_id': '3', 'urgency': 3, 'state': 'fetched', 'flags': { 'marked_for_sms': True } }, { '_id': '4', 'urgency': 4, 'state': 'fetched', 'task': { 'desk': '1' }, 'ingest_provider': '1' }, { '_id': '5', 'urgency': 2, 'state': 'fetched', 'task': { 'desk': '2' }, 'priority': 3 }, { '_id': '6', 'state': 'fetched', 'embargo': utcnow(), 'schedule_settings': { 'utc_embargo': utcnow() + timedelta(minutes=20) } }, { '_id': '7', 'genre': [{ 'name': 'Sidebar' }], 'state': 'fetched' }, { '_id': '8', 'subject': [{ 'name': 'adult education', 'qcode': '05001000', 'parent': '05000000' }, { 'name': 'high schools', 'qcode': '05005003', 'parent': '05005000' }], 'state': 'fetched' }, { '_id': '9', 'state': 'fetched', 'anpa_category': [{ 'qcode': 'a', 'name': 'Aus News' }] }, { '_id': '10', 'body_html': '<p>Mention<p>', 'embargo': utcnow(), 'schedule_settings': { 'utc_embargo': utcnow() - timedelta(minutes=20) } }, { '_id': '11', 'place': [{ 'qcode': 'NSW', 'name': 'NSW' }], 'state': 'fetched' }, { '_id': '12', 'body_html': '<div> \n \n<body dir=\"ltr\"> \n<div> \n' '<span>SDA</span><br/> \n</div> \n \n</body>' ' \n</div>', 'embargo': utcnow() }] self.app.data.insert('archive', self.articles) self.app.data.insert('filter_conditions', [{ '_id': 1, 'field': 'headline', 'operator': 'like', 'value': 'tor', 'name': 'test-1' }]) self.app.data.insert('filter_conditions', [{ '_id': 2, 'field': 'urgency', 'operator': 'in', 'value': '2', 'name': 'test-2' }]) self.app.data.insert('filter_conditions', [{ '_id': 3, 'field': 'urgency', 'operator': 'in', 'value': '3,4,5', 'name': 'test-2' }]) self.app.data.insert('filter_conditions', [{ '_id': 4, 'field': 'urgency', 'operator': 'nin', 'value': '1,2,3', 'name': 'test-2' }]) self.app.data.insert('filter_conditions', [{ '_id': 5, 'field': 'urgency', 'operator': 'in', 'value': '2,5', 'name': 'test-2' }]) self.app.data.insert( 'content_filters', [{ "_id": 1, "content_filter": [{ "expression": { "fc": [1] } }], "name": "soccer-only" }])
def init_data(self): self.users = [{'_id': '1', 'username': '******'}] self.desks = [{ '_id': ObjectId('123456789ABCDEF123456789'), 'name': 'desk1' }] self.products = [{ "_id": "1", "name": "prod1", "geo_restrictions": "NSW", "email": "*****@*****.**" }, { "_id": "2", "name": "prod2", "codes": "abc,def," }, { "_id": "3", "name": "prod3", "codes": "xyz" }] self.subscribers = [{ "_id": "1", "name": "sub1", "is_active": True, "subscriber_type": SUBSCRIBER_TYPES.WIRE, "media_type": "media", "sequence_num_settings": { "max": 10, "min": 1 }, "email": "*****@*****.**", "products": ["1"], "destinations": [{ "name": "dest1", "format": "nitf", "delivery_type": "ftp", "config": { "address": "127.0.0.1", "username": "******" } }] }, { "_id": "2", "name": "sub2", "is_active": True, "subscriber_type": SUBSCRIBER_TYPES.WIRE, "media_type": "media", "sequence_num_settings": { "max": 10, "min": 1 }, "email": "*****@*****.**", "products": ["1"], "destinations": [{ "name": "dest2", "format": "nitf", "delivery_type": "filecopy", "config": { "address": "/share/copy" } }, { "name": "dest3", "format": "nitf", "delivery_type": "Email", "config": { "recipients": "*****@*****.**" } }] }, { "_id": "3", "name": "sub3", "is_active": True, "subscriber_type": SUBSCRIBER_TYPES.DIGITAL, "media_type": "media", "sequence_num_settings": { "max": 10, "min": 1 }, "email": "*****@*****.**", "products": ["1"], "destinations": [{ "name": "dest1", "format": "nitf", "delivery_type": "ftp", "config": { "address": "127.0.0.1", "username": "******" } }] }, { "_id": "4", "name": "sub4", "is_active": True, "subscriber_type": SUBSCRIBER_TYPES.WIRE, "media_type": "media", "sequence_num_settings": { "max": 10, "min": 1 }, "products": ["1"], "destinations": [{ "name": "dest1", "format": "nitf", "delivery_type": "ftp", "config": { "address": "127.0.0.1", "username": "******" } }] }, { "_id": "5", "name": "sub5", "is_active": True, "subscriber_type": SUBSCRIBER_TYPES.ALL, "media_type": "media", "sequence_num_settings": { "max": 10, "min": 1 }, "email": "*****@*****.**", "codes": "xyz, klm", "products": ["1", "2"], "destinations": [{ "name": "dest1", "format": "ninjs", "delivery_type": "ftp", "config": { "address": "127.0.0.1", "username": "******" } }] }] self.articles = [{ 'guid': 'tag:localhost:2015:69b961ab-2816-4b8a-a584-a7b402fed4f9', '_id': '1', ITEM_TYPE: CONTENT_TYPE.TEXT, 'last_version': 3, config.VERSION: 4, 'body_html': 'Test body', 'anpa_category': [{ 'qcode': 'A', 'name': 'Sport' }], 'urgency': 4, 'headline': 'Two students missing', 'pubstatus': 'usable', 'firstcreated': utcnow(), 'byline': 'By Alan Karben', 'ednote': 'Andrew Marwood contributed to this article', 'dateline': { 'located': { 'city': 'Sydney' } }, 'keywords': ['Student', 'Crime', 'Police', 'Missing'], 'subject': [{ 'qcode': '17004000', 'name': 'Statistics' }, { 'qcode': '04001002', 'name': 'Weather' }], 'task': { 'user': '******', 'desk': '123456789ABCDEF123456789' }, ITEM_STATE: CONTENT_STATE.PUBLISHED, 'expiry': utcnow() + timedelta(minutes=20), 'slugline': 'story slugline', 'unique_name': '#1' }, { 'guid': 'tag:localhost:2015:69b961ab-2816-4b8a-a974-xy4532fe33f9', '_id': '2', 'last_version': 3, config.VERSION: 4, 'body_html': 'Test body of the second article', 'slugline': 'story slugline', 'urgency': 4, 'anpa_category': [{ 'qcode': 'A', 'name': 'Sport' }], 'headline': 'Another two students missing', 'pubstatus': 'usable', 'firstcreated': utcnow(), 'byline': 'By Alan Karben', 'ednote': 'Andrew Marwood contributed to this article', 'dateline': { 'located': { 'city': 'Sydney' } }, 'keywords': ['Student', 'Crime', 'Police', 'Missing'], 'subject': [{ 'qcode': '17004000', 'name': 'Statistics' }, { 'qcode': '04001002', 'name': 'Weather' }], 'expiry': utcnow() + timedelta(minutes=20), 'task': { 'user': '******', 'desk': '123456789ABCDEF123456789' }, ITEM_STATE: CONTENT_STATE.PROGRESS, 'publish_schedule': "2016-05-30T10:00:00+0000", ITEM_TYPE: CONTENT_TYPE.TEXT, 'unique_name': '#2' }, { 'guid': 'tag:localhost:2015:69b961ab-2816-4b8a-a584-a7b402fed4fa', '_id': '3', 'last_version': 3, config.VERSION: 4, 'body_html': 'Test body', 'slugline': 'story slugline', 'urgency': 4, 'anpa_category': [{ 'qcode': 'A', 'name': 'Sport' }], 'headline': 'Two students missing killed', 'pubstatus': 'usable', 'firstcreated': utcnow(), 'byline': 'By Alan Karben', 'ednote': 'Andrew Marwood contributed to this article killed', 'dateline': { 'located': { 'city': 'Sydney' } }, 'keywords': ['Student', 'Crime', 'Police', 'Missing'], 'subject': [{ 'qcode': '17004000', 'name': 'Statistics' }, { 'qcode': '04001002', 'name': 'Weather' }], 'task': { 'user': '******', 'desk': '123456789ABCDEF123456789' }, ITEM_STATE: CONTENT_STATE.KILLED, 'expiry': utcnow() + timedelta(minutes=20), ITEM_TYPE: CONTENT_TYPE.TEXT, 'unique_name': '#3' }, { 'guid': '8', '_id': '8', 'last_version': 3, config.VERSION: 4, 'target_regions': [{ 'qcode': 'NSW', 'name': 'New South Wales', 'allow': True }], 'body_html': 'Take-1 body', 'urgency': 4, 'headline': 'Take-1 headline', 'abstract': 'Abstract for take-1', 'anpa_category': [{ 'qcode': 'A', 'name': 'Sport' }], 'pubstatus': 'done', 'firstcreated': utcnow(), 'byline': 'By Alan Karben', 'dateline': { 'located': { 'city': 'Sydney' } }, 'slugline': 'taking takes', 'keywords': ['Student', 'Crime', 'Police', 'Missing'], 'subject': [{ 'qcode': '17004000', 'name': 'Statistics' }, { 'qcode': '04001002', 'name': 'Weather' }], 'task': { 'user': '******', 'desk': '123456789ABCDEF123456789' }, ITEM_STATE: CONTENT_STATE.PROGRESS, 'expiry': utcnow() + timedelta(minutes=20), ITEM_TYPE: CONTENT_TYPE.TEXT, 'unique_name': '#8' }, { '_id': '9', 'urgency': 3, 'headline': 'creator', 'task': { 'user': '******', 'desk': '123456789ABCDEF123456789' }, ITEM_STATE: CONTENT_STATE.FETCHED }, { 'guid': 'tag:localhost:2015:69b961ab-a7b402fed4fb', '_id': 'test_item_9', 'last_version': 3, config.VERSION: 4, 'body_html': 'Student Crime. Police Missing.', 'urgency': 4, 'headline': 'Police Missing', 'abstract': 'Police Missing', 'anpa_category': [{ 'qcode': 'A', 'name': 'Australian General News' }], 'pubstatus': 'usable', 'firstcreated': utcnow(), 'byline': 'By Alan Karben', 'dateline': { 'located': { 'city': 'Sydney' } }, 'slugline': 'Police Missing', 'keywords': ['Student', 'Crime', 'Police', 'Missing'], 'subject': [{ 'qcode': '17004000', 'name': 'Statistics' }, { 'qcode': '04001002', 'name': 'Weather' }], 'task': { 'user': '******', 'desk': '123456789ABCDEF123456789' }, ITEM_STATE: CONTENT_STATE.PROGRESS, ITEM_TYPE: CONTENT_TYPE.TEXT, 'unique_name': '#9' }, { 'guid': 'tag:localhost:10:10:10:2015:69b961ab-2816-4b8a-a584-a7b402fed4fc', '_id': '100', config.VERSION: 3, 'task': { 'user': '******', 'desk': '123456789ABCDEF123456789' }, ITEM_TYPE: CONTENT_TYPE.COMPOSITE, 'groups': [{ 'id': 'root', 'refs': [{ 'idRef': 'main' }], 'role': 'grpRole:NEP' }, { 'id': 'main', 'refs': [{ 'location': ARCHIVE, ITEM_TYPE: CONTENT_TYPE.COMPOSITE, RESIDREF: '6' }], 'role': 'grpRole:main' }], 'firstcreated': utcnow(), 'expiry': utcnow() + timedelta(minutes=20), 'unique_name': '#100', ITEM_STATE: CONTENT_STATE.PROGRESS }]
class AsiaNetFeedParserTestCase(TestCase): filename = 'asianet_{}.tst' year = utcnow().year headers = [{ 'headline': 'Media Release: Digital Turbine, Inc.', 'anpa_take_key': 'Digital Turbine, Inc.', 'original_source': 'AsiaNet', 'first_line': '<p>MEDIA RELEASE PR67276 Digital Turbine Partners with' }, { 'headline': 'Media Release: Queen Elizabeth Prize', 'anpa_take_key': 'Queen Elizabeth Prize', 'original_source': 'AsiaNet', 'first_line': '<p>MEDIA RELEASE PR67254 Queen Elizabeth Prize' }, { 'headline': 'Media Release: Escola Aguia de Ouro', 'anpa_take_key': 'Escola Aguia de Ouro', 'original_source': 'AsiaNet', 'first_line': '<p>MEDIA RELEASE PR67255 Animal rights come to Brazil' }, { 'headline': 'Media Release: Essence', 'anpa_take_key': 'Essence', 'original_source': 'AsiaNet', 'first_line': '<p>MEDIA RELEASE PR67257 Digital Agency Essence Builds on Enormous Growth' }, { 'headline': 'Media Release: OMRON Corporation', 'anpa_take_key': 'OMRON Corporation', 'original_source': 'AsiaNet', 'first_line': '<p>MEDIA RELEASE PR67261 OMRON Launches Promotional Website for AI-equipped' }, { 'headline': 'Media Release: OnApp', 'anpa_take_key': 'OnApp', 'original_source': 'AsiaNet', 'first_line': '<p>MEDIA RELEASE PR67266 OnApp v5.3 Simplifies Add-on Services' }, { 'headline': 'Media Release: Shinetech Software Inc.', 'anpa_take_key': 'Shinetech Software Inc.', 'original_source': 'AsiaNet', 'first_line': '<p>MEDIA RELEASE PR67271 Shinetech Software, Inc. Reports 16% Growth in 2016' }, { 'headline': 'Media Release: Huntsman Family Investments', 'anpa_take_key': 'Huntsman Family Investme', 'original_source': 'AsiaNet', 'first_line': '<p>MEDIA RELEASE PR67275 Huntsman Family Investments to Acquire GTA TeleGuam' }, { 'headline': 'Media Release: Neovia Oncology Ltd', 'anpa_take_key': 'Neovia Oncology Ltd', 'original_source': 'AsiaNet', 'first_line': '<p>MEDIA RELEASE PR67278 Neovia Enrolls First Patient in Cancer Trial' }, { 'headline': 'Media Release: IndiGrid', 'anpa_take_key': 'IndiGrid', 'original_source': 'AsiaNet', 'first_line': '<p>MEDIA RELEASE PR74541 IndiGrid Delivers Another Strong Quarter' }] def setUp(self): self.provider = {'name': 'Test'} self.maxDiff = None def test_can_parse(self): for i in range(1, 11): self.assertTrue(AsiaNetFeedParser().can_parse( self._get_fixture(i))) def test_feed_parser(self): test_keys = ['headline', 'anpa_take_key', 'original_source'] for i in range(1, 11): item = AsiaNetFeedParser().parse(self._get_fixture(i), self.provider) expected = self.headers[i - 1] for key in test_keys: self.assertEqual(item[key], expected[key]) self.assertGreater(item['word_count'], 0) # This tests for the body content, as well as as html escaping self.assertTrue(item['body_html'].startswith( expected['first_line'])) def _get_fixture(self, index): dirname = os.path.dirname(os.path.realpath(__file__)) return os.path.normpath( os.path.join(dirname, '../fixtures', self.filename.format(index)))
def _get_events_date_filters(self, request): """Get date filters for events resource :param request: object representing the HTTP request """ params = request.args or MultiDict() date_filter_param, start_date, end_date = self._parse_date_params(params) if not (date_filter_param or start_date or end_date): return { 'range': { 'dates.end': { 'gte': 'now/d', 'time_zone': get_timezone_offset(config.DEFAULT_TIMEZONE, utcnow()) } } } start_of_week = self._get_start_of_week(params) date_filters = [] def get_pre_defined_date_filter(start, end): filterList = list() filterList.append({ 'range': { 'dates.start': { 'gte': start, 'lt': end, 'time_zone': get_timezone_offset(config.DEFAULT_TIMEZONE, utcnow()) } } }) filterList.append({ 'range': { 'dates.end': { 'gte': start, 'lt': end, 'time_zone': get_timezone_offset(config.DEFAULT_TIMEZONE, utcnow()) } } }) filterList.append({ 'and': { 'filters': [ { 'range': { 'dates.start': { 'lt': start, 'time_zone': get_timezone_offset(config.DEFAULT_TIMEZONE, utcnow()) }, }, }, { 'range': { 'dates.end': { 'gt': end, 'time_zone': get_timezone_offset(config.DEFAULT_TIMEZONE, utcnow()) }, }, }, ], }, }) return filterList if date_filter_param.lower() == 'today': date_filters = get_pre_defined_date_filter('now/d', 'now+24h/d') elif date_filter_param.lower() == 'tomorrow': date_filters = get_pre_defined_date_filter('now+24h/d', 'now+48h/d') elif date_filter_param.lower() == 'this_week': end_of_this_week = get_start_of_next_week(None, start_of_week) start_of_this_week = end_of_this_week - timedelta(days=7) date_filters = get_pre_defined_date_filter( '{}||/d'.format(start_of_this_week.strftime(config.ELASTIC_DATE_FORMAT)), '{}||/d'.format(end_of_this_week.strftime(config.ELASTIC_DATE_FORMAT)) ) elif date_filter_param.lower() == 'next_week': start_of_next_week = get_start_of_next_week(None, start_of_week) end_of_next_week = start_of_next_week + timedelta(days=7) date_filters = get_pre_defined_date_filter( '{}||/d'.format(start_of_next_week.strftime(config.ELASTIC_DATE_FORMAT)), '{}||/d'.format(end_of_next_week.strftime(config.ELASTIC_DATE_FORMAT)) ) else: if start_date and not end_date: date_filters.extend([ { 'range': { 'dates.start': { 'gte': start_date, 'time_zone': get_timezone_offset(config.DEFAULT_TIMEZONE, utcnow()) }, }, }, { 'range': { 'dates.end': { 'gte': start_date, 'time_zone': get_timezone_offset(config.DEFAULT_TIMEZONE, utcnow()) }, }, } ]) elif not start_date and end_date: date_filters.extend([ { 'range': { 'dates.end': { 'lte': end_date, 'time_zone': get_timezone_offset(config.DEFAULT_TIMEZONE, utcnow()) }, }, }, { 'range': { 'dates.start': { 'lte': end_date, 'time_zone': get_timezone_offset(config.DEFAULT_TIMEZONE, utcnow()) }, }, } ]) else: date_filters.extend([ { 'range': { 'dates.start': { 'gte': start_date, 'time_zone': get_timezone_offset(config.DEFAULT_TIMEZONE, utcnow()) }, 'dates.end': { 'lte': end_date, 'time_zone': get_timezone_offset(config.DEFAULT_TIMEZONE, utcnow()) }, }, }, { 'and': { 'filters': [ { 'range': { 'dates.start': { 'lt': start_date, 'time_zone': get_timezone_offset(config.DEFAULT_TIMEZONE, utcnow()) }, }, }, { 'range': { 'dates.end': { 'gt': end_date, 'time_zone': get_timezone_offset(config.DEFAULT_TIMEZONE, utcnow()) }, }, }, ], }, }, { 'or': { 'filters': [ { 'range': { 'dates.start': { 'gte': start_date, 'lte': end_date, 'time_zone': get_timezone_offset(config.DEFAULT_TIMEZONE, utcnow()) }, }, }, { 'range': { 'dates.end': { 'gte': start_date, 'lte': end_date, 'time_zone': get_timezone_offset(config.DEFAULT_TIMEZONE, utcnow()) }, }, }, ], }, } ]) return { 'or': { 'filters': date_filters } }
def test_products(self): self.app.data.insert( 'content_filters', [{ "_id": 3, "content_filter": [{ "expression": { "pf": [1], "fc": [2] } }], "name": "soccer-only3" }]) self.app.data.insert('filter_conditions', [{ '_id': 1, 'field': 'headline', 'operator': 'like', 'value': 'test', 'name': 'test-1' }]) self.app.data.insert('filter_conditions', [{ '_id': 2, 'field': 'urgency', 'operator': 'in', 'value': '2', 'name': 'test-2' }]) self.app.data.insert('products', [{ "_id": 1, "content_filter": { "filter_id": 3, "filter_type": "permitting" }, "name": "p-1", "product_type": "api" }]) self.app.data.insert('vocabularies', [{ "_id": "locators", "display_name": "Locators", "type": "unmanageable", "unique_field": "qcode", "items": [ { "is_active": True, "name": "NSW", "qcode": "NSW", "state": "New South Wales", "country": "Australia", "world_region": "Oceania", "group": "Australia" }, ], }]) embargo_ts = (utcnow() + timedelta(days=2)) article = { '_id': 'tag:aap.com.au:20150613:12345', 'guid': 'tag:aap.com.au:20150613:12345', '_current_version': 1, 'anpa_category': [{ 'qcode': 'a' }], 'source': 'AAP', 'headline': 'This is a test headline', 'byline': 'joe', 'slugline': 'slugline', 'subject': [{ 'qcode': '02011001', 'name': 'international court or tribunal', 'parent': None }, { 'qcode': '02011002', 'name': 'extradition' }], 'anpa_take_key': 'take_key', 'unique_id': '1', 'body_html': 'The story body', 'type': 'text', 'word_count': '1', 'priority': 1, 'profile': 'snap', 'state': 'published', 'urgency': 2, 'pubstatus': 'usable', 'creditline': 'sample creditline', 'keywords': ['traffic'], 'abstract': '<p>sample <b>abstract</b></p>', 'place': [{ 'name': 'NSW', 'qcode': 'NSW' }], 'embargo': embargo_ts, 'body_footer': '<p>call helpline 999 if you are planning to quit smoking</p>', 'company_codes': [{ 'name': 'YANCOAL AUSTRALIA LIMITED', 'qcode': 'YAL', 'security_exchange': 'ASX' }], 'genre': [{ 'name': 'Article', 'qcode': 'article' }], 'flags': { 'marked_for_legal': True }, 'extra': { 'foo': 'test' }, 'operation': 'publish' } seq, doc = self.formatter.format(article, {'name': 'Test Subscriber'})[0] expected = { "guid": "tag:aap.com.au:20150613:12345", "version": "1", "place": [{ "code": "NSW", "name": "New South Wales" }], "pubstatus": "usable", "body_html": "The story body<p>call helpline 999 if you are planning to quit smoking</p>", "type": "text", "subject": [{ "code": "02011001", "name": "international court or tribunal" }, { "code": "02011002", "name": "extradition" }], "service": [{ "code": "a" }], "source": "AAP", "headline": "This is a test headline", "byline": "joe", "urgency": 2, "priority": 1, "embargoed": embargo_ts.isoformat(), "profile": "snap", "slugline": "slugline", "description_text": "sample abstract", "description_html": "<p>sample <b>abstract</b></p>", 'keywords': ['traffic'], 'organisation': [{ 'name': 'YANCOAL AUSTRALIA LIMITED', 'rel': 'Securities Identifier', 'symbols': [{ 'ticker': 'YAL', 'exchange': 'ASX' }] }], 'genre': [{ 'name': 'Article', 'code': 'article' }], 'signal': [{ 'name': 'Content Warning', 'code': 'cwarn', 'scheme': 'http://cv.iptc.org/newscodes/signal/' }], 'extra': { 'foo': 'test' }, 'charcount': 67, 'wordcount': 13, 'readtime': 0, 'products': [{ 'code': 1, 'name': 'p-1' }] } self.assertEqual(json.loads(doc), expected) article['urgency'] = 1 seq, doc = self.formatter.format(article, {'name': 'Test Subscriber'})[0] expected = { "guid": "tag:aap.com.au:20150613:12345", "version": "1", "place": [{ "code": "NSW", "name": "New South Wales" }], "pubstatus": "usable", "body_html": "The story body<p>call helpline 999 if you are planning to quit smoking</p>", "type": "text", "subject": [{ "code": "02011001", "name": "international court or tribunal" }, { "code": "02011002", "name": "extradition" }], "service": [{ "code": "a" }], "source": "AAP", "headline": "This is a test headline", "byline": "joe", "urgency": 1, "priority": 1, "embargoed": embargo_ts.isoformat(), "profile": "snap", "slugline": "slugline", "description_text": "sample abstract", "description_html": "<p>sample <b>abstract</b></p>", 'keywords': ['traffic'], 'organisation': [{ 'name': 'YANCOAL AUSTRALIA LIMITED', 'rel': 'Securities Identifier', 'symbols': [{ 'ticker': 'YAL', 'exchange': 'ASX' }] }], 'genre': [{ 'name': 'Article', 'code': 'article' }], 'signal': [{ 'name': 'Content Warning', 'code': 'cwarn', 'scheme': 'http://cv.iptc.org/newscodes/signal/' }], 'extra': { 'foo': 'test' }, 'charcount': 67, 'wordcount': 13, 'readtime': 0, 'products': [] } self.assertEqual(json.loads(doc), expected)
def test_filter_expired_items(self): provider, provider_service = self.setup_reuters_provider() items = provider_service.fetch_ingest(reuters_guid) for item in items[:4]: item['expiry'] = utcnow() + timedelta(minutes=11) self.assertEqual(4, len(ingest.filter_expired_items(provider, items)))
def _get_date(self, article, field): return utc_to_local(config.DEFAULT_TIMEZONE or 'UTC', article.get(field) or utcnow())
def prepopulate_data(file_name, default_user=get_default_user(), directory=None): if not directory: directory = os.path.abspath(os.path.dirname(__file__)) placeholders = {'NOW()': date_to_str(utcnow())} users = {default_user['username']: default_user['password']} default_username = default_user['username'] file = os.path.join(directory, file_name) with open(file, 'rt', encoding='utf8') as app_prepopulation: json_data = json.load(app_prepopulation) for item in json_data: resource = item.get('resource', None) try: service = get_resource_service(resource) except KeyError: continue # resource which is not configured - ignore username = item.get('username', None) or default_username set_logged_user(username, users[username]) id_name = item.get('id_name', None) id_update = item.get('id_update', None) text = json.dumps(item.get('data', None)) text = apply_placeholders(placeholders, text) data = json.loads(text) if resource: app.data.mongo._mongotize(data, resource) if resource == 'users': users.update({data['username']: data['password']}) if id_update: id_update = apply_placeholders(placeholders, id_update) res = service.patch(ObjectId(id_update), data) if not res: raise Exception() else: try: ids = service.post([data]) except werkzeug.exceptions.Conflict: ids = [data['_id']] # data with given id is there already except superdesk.errors.SuperdeskApiError: continue # an error raised by validation - can't guess why, so ignore if not ids: raise Exception() if id_name: placeholders[id_name] = str(ids[0]) if app.config['VERSION'] in data: number_of_versions_to_insert = data[app.config['VERSION']] doc_versions = [] if data[ITEM_STATE] not in [CONTENT_STATE.PUBLISHED, CONTENT_STATE.CORRECTED, CONTENT_STATE.KILLED]: while number_of_versions_to_insert != 0: doc_versions.append(data.copy()) number_of_versions_to_insert -= 1 else: if data[ITEM_STATE] in [CONTENT_STATE.KILLED, CONTENT_STATE.RECALLED, CONTENT_STATE.CORRECTED]: latest_version = data.copy() doc_versions.append(latest_version) published_version = data.copy() published_version[ITEM_STATE] = CONTENT_STATE.PUBLISHED published_version[ITEM_OPERATION] = 'publish' published_version[app.config['VERSION']] = number_of_versions_to_insert - 1 doc_versions.append(published_version) number_of_versions_to_insert -= 2 elif data[ITEM_STATE] == CONTENT_STATE.PUBLISHED: published_version = data.copy() doc_versions.append(published_version) number_of_versions_to_insert -= 1 while number_of_versions_to_insert != 0: doc = data.copy() doc[ITEM_STATE] = CONTENT_STATE.PROGRESS doc.pop(ITEM_OPERATION, '') doc[app.config['VERSION']] = number_of_versions_to_insert doc_versions.append(doc) number_of_versions_to_insert -= 1 insert_versioning_documents(resource, doc_versions if doc_versions else data)
def _init_article_versions(self): resource_def = self.app.config['DOMAIN']['archive_versions'] version_id = versioned_id_field(resource_def) return [{ 'guid': 'tag:localhost:2015:69b961ab-2816-4b8a-a584-a7b402fed4f9', version_id: '1', ITEM_TYPE: CONTENT_TYPE.TEXT, config.VERSION: 1, 'urgency': 4, 'pubstatus': 'usable', 'firstcreated': utcnow(), 'byline': 'By Alan Karben', 'dateline': { 'located': { 'city': 'Sydney' } }, 'keywords': ['Student', 'Crime', 'Police', 'Missing'], 'subject': [{ 'qcode': '17004000', 'name': 'Statistics' }, { 'qcode': '04001002', 'name': 'Weather' }], ITEM_STATE: CONTENT_STATE.DRAFT, 'expiry': utcnow() + timedelta(minutes=20), 'unique_name': '#8' }, { 'guid': 'tag:localhost:2015:69b961ab-2816-4b8a-a584-a7b402fed4f9', version_id: '1', ITEM_TYPE: CONTENT_TYPE.TEXT, config.VERSION: 2, 'urgency': 4, 'headline': 'Two students missing', 'pubstatus': 'usable', 'firstcreated': utcnow(), 'byline': 'By Alan Karben', 'dateline': { 'located': { 'city': 'Sydney' } }, 'keywords': ['Student', 'Crime', 'Police', 'Missing'], 'subject': [{ 'qcode': '17004000', 'name': 'Statistics' }, { 'qcode': '04001002', 'name': 'Weather' }], ITEM_STATE: CONTENT_STATE.SUBMITTED, 'expiry': utcnow() + timedelta(minutes=20), 'unique_name': '#8' }, { 'guid': 'tag:localhost:2015:69b961ab-2816-4b8a-a584-a7b402fed4f9', version_id: '1', ITEM_TYPE: CONTENT_TYPE.TEXT, config.VERSION: 3, 'urgency': 4, 'headline': 'Two students missing', 'pubstatus': 'usable', 'firstcreated': utcnow(), 'byline': 'By Alan Karben', 'ednote': 'Andrew Marwood contributed to this article', 'dateline': { 'located': { 'city': 'Sydney' } }, 'keywords': ['Student', 'Crime', 'Police', 'Missing'], 'subject': [{ 'qcode': '17004000', 'name': 'Statistics' }, { 'qcode': '04001002', 'name': 'Weather' }], ITEM_STATE: CONTENT_STATE.PROGRESS, 'expiry': utcnow() + timedelta(minutes=20), 'unique_name': '#8' }, { 'guid': 'tag:localhost:2015:69b961ab-2816-4b8a-a584-a7b402fed4f9', version_id: '1', ITEM_TYPE: CONTENT_TYPE.TEXT, config.VERSION: 4, 'body_html': 'Test body', 'urgency': 4, 'headline': 'Two students missing', 'pubstatus': 'usable', 'firstcreated': utcnow(), 'byline': 'By Alan Karben', 'ednote': 'Andrew Marwood contributed to this article', 'dateline': { 'located': { 'city': 'Sydney' } }, 'keywords': ['Student', 'Crime', 'Police', 'Missing'], 'subject': [{ 'qcode': '17004000', 'name': 'Statistics' }, { 'qcode': '04001002', 'name': 'Weather' }], ITEM_STATE: CONTENT_STATE.PROGRESS, 'expiry': utcnow() + timedelta(minutes=20), 'unique_name': '#8' }]
def format_filename(self, item): attachment_filename = '%s-monitoring-export.pdf' % utcnow().strftime( '%Y%m%d%H%M%S') return secure_filename(attachment_filename)
def test_validate_schedule(self): validate_schedule(utcnow() + timedelta(hours=2))
class LegalArchiveConsistencyCheckCommand(superdesk.Command): option_list = { superdesk.Option('--input_date', '-i', dest='input_date', default=utcnow()), superdesk.Option('--days_to_process', '-d', dest='days_to_process', default=1), superdesk.Option('--page_size', '-p', dest='page_size', default=500), } default_page_size = 500 archive_ids = [] def run(self, input_date, days_to_process, page_size): lock_name = 'legal_archive:consistency' self.default_page_size = int(page_size) days_to_process = int(days_to_process) if not lock(lock_name, expire=610): logger.warn("Task: {} is already running.".format(lock_name)) return try: logger.info('Input Date: {} ---- Days to Process: {}'.format( input_date, days_to_process)) self.check_legal_archive_consistency(input_date, days_to_process) self.check_legal_archive_version_consistency() self.check_legal_archive_queue_consistency() logger.info('Completed the legal archive consistency check.') except: logger.exception( "Failed to execute LegalArchiveConsistencyCheckCommand") finally: unlock(lock_name) def check_legal_archive_consistency(self, input_date, days_to_process): start_time = utcnow() start_date, end_date = self._get_date_range(input_date, days_to_process) logger.info('start_date: {} ---- end_date: {}'.format( start_date, end_date)) archive_items = self._get_archive_items(start_date, end_date) if archive_items: self.archive_ids = list(archive_items.keys()) logger.info("Found {} items in archive.".format(len(archive_items))) legal_archive_items = self._get_legal_archive_items(self.archive_ids) logger.info("Found {} items in legal archive.".format( len(legal_archive_items))) record = self.check_consistency('archive', archive_items, legal_archive_items) record['completed_at'] = utcnow() record['started_at'] = start_time get_resource_service('legal_archive_consistency').post([record]) def check_legal_archive_version_consistency(self): start_time = utcnow() if not self.archive_ids: return archive_items = self._get_archive_version_items(self.archive_ids) logger.info("Found {} items in archive versions.".format( len(archive_items))) legal_archive_items = self._get_legal_archive_version_items( self.archive_ids) logger.info("Found {} items in legal archive versions.".format( len(legal_archive_items))) record = self.check_consistency('archive_versions', archive_items, legal_archive_items) record['completed_at'] = utcnow() record['started_at'] = start_time get_resource_service('legal_archive_consistency').post([record]) def check_legal_archive_queue_consistency(self): start_time = utcnow() if not self.archive_ids: return archive_items = self._get_publish_queue_items(self.archive_ids) logger.info("Found {} items in publish queue.".format( len(archive_items))) legal_archive_items = self._get_legal_publish_queue_items( self.archive_ids) logger.info("Found {} items in legal publish queue.".format( len(archive_items))) record = self.check_consistency('publish_queue', archive_items, legal_archive_items) record['completed_at'] = utcnow() record['started_at'] = start_time get_resource_service('legal_archive_consistency').post([record]) def check_consistency(self, resource, archive_items, legal_items): record = { 'resource': resource, 'archive': len(archive_items), 'legal': len(legal_items) } archive_ids = set(archive_items.keys()) legal_ids = set(legal_items.keys()) record['archive_only'] = list(archive_ids - legal_ids) diff = {} for k, v in archive_items.items(): if not compare_dictionaries(v, legal_items.get(k)): diff[k.replace('.', ':')] = { 'archive': v, 'legal': legal_items.get(k) } record['difference'] = diff record['identical'] = len(archive_items) - len(diff) return record def __get_key(self, item): return item.get(config.ID_FIELD) def __get_version_key(self, item): return '{}-{}'.format(item.get('_id_document'), item.get(config.VERSION)) def _get_items(self, resource, query, sort, keys, callback): req = ParsedRequest() cursor = get_resource_service(resource).get_from_mongo(req=req, lookup=query) count = cursor.count() no_of_buckets = len(range(0, count, self.default_page_size)) items = {} req.sort = sort for bucket in range(0, no_of_buckets): skip = bucket * self.default_page_size logger.info('Page : {}, skip: {}'.format(bucket + 1, skip)) cursor = get_resource_service(resource).get_from_mongo( req=req, lookup=query) cursor.skip(skip) cursor.limit(self.default_page_size) cursor = list(cursor) items.update({ callback(item): {key: item.get(key) for key in keys if key in item} for item in cursor }) return items def _get_archive_items(self, start_date, end_date): """ Gets the archive items from the mongo database that were updated today :return: """ query = { '$and': [{ '_updated': { '$gte': date_to_str(start_date), '$lte': date_to_str(end_date) } }, { ITEM_STATE: { '$in': [ CONTENT_STATE.CORRECTED, CONTENT_STATE.PUBLISHED, CONTENT_STATE.KILLED, CONTENT_STATE.RECALLED ] } }] } return self._get_items(ARCHIVE, query, '_created', [config.VERSION, 'versioncreated', 'state'], self.__get_key) def _get_legal_archive_items(self, archive_ids): """ Get the legal archive items :param list archive_ids: :return dict: """ if not archive_ids: return {} query = {'$and': [{'_id': {'$in': archive_ids}}]} return self._get_items('legal_archive', query, '_created', [config.VERSION, 'versioncreated', 'state'], self.__get_key) def _get_archive_version_items(self, archive_ids): """ Get the archive version items :param list archive_ids: :return dict: """ if not archive_ids: return {} query = {'$and': [{'_id_document': {'$in': archive_ids}}]} return self._get_items('archive_versions', query, '_created', [config.VERSION, 'versioncreated', 'state'], self.__get_version_key) def _get_legal_archive_version_items(self, archive_ids): """ Get the legal archive version items :param list archive_ids: :return dict: """ if not archive_ids: return {} query = {'$and': [{'_id_document': {'$in': archive_ids}}]} return self._get_items('legal_archive_versions', query, '_created', [config.VERSION, 'versioncreated', 'state'], self.__get_version_key) def _get_publish_queue_items(self, archive_ids): """ Get the publish queue items :param list archive_ids: :return dict: """ if not archive_ids: return {} query = {'$and': [{'item_id': {'$in': archive_ids}}]} return self._get_items('publish_queue', query, '_created', [ 'published_seq_num', 'publishing_action', 'unique_name', 'item_version', 'state', 'content_type' ], self.__get_key) def _get_legal_publish_queue_items(self, archive_ids): """ Get the legal publish queue items :param list archive_ids: :return dict: """ if not archive_ids: return {} query = {'$and': [{'item_id': {'$in': archive_ids}}]} return self._get_items('legal_publish_queue', query, '_created', [ 'published_seq_num', 'publishing_action', 'unique_name', 'item_version', 'state', 'content_type' ], self.__get_key) def _get_date_range(self, input_date, days_to_process=1): """ Calculate the date range to process :param datetime input_date: :param int days_to_process: :return: """ if not input_date: input_date = utcnow() elif isinstance(input_date, str): input_date = get_date(input_date) elif not isinstance(input_date, datetime): raise ValueError("Invalid Input Date.") end_date = input_date start_date = (end_date - timedelta(days=int(days_to_process))).replace( hour=0, minute=0, second=0, microsecond=0) return start_date, end_date