def test_files_dont_duplicate_ingest(self):
        provider_name = 'reuters'
        guid = 'tag_reuters.com_2014_newsml_KBN0FL0NM:10'
        provider = get_resource_service('ingest_providers').find_one(name=provider_name, req=None)
        provider_service = self._get_provider_service(provider)
        provider_service.provider = provider
        provider_service.URL = provider.get('config', {}).get('url')
        items = provider_service.fetch_ingest(guid)

        for item in items:
            item['ingest_provider'] = provider['_id']
            item['expiry'] = utcnow() + timedelta(hours=11)

        # ingest the items
        self.ingest_items(items, provider, provider_service)

        items = provider_service.fetch_ingest(guid)
        for item in items:
            item['ingest_provider'] = provider['_id']
            item['expiry'] = utcnow() + timedelta(hours=11)

        # ingest them again
        self.ingest_items(items, provider, provider_service)

        # 12 files in grid fs
        current_files = self.app.media.fs('upload').find()
        self.assertEqual(12, current_files.count())
 def test_ingest_cancellation(self):
     provider_name = 'reuters'
     guid = 'tag_reuters.com_2016_newsml_L1N14N0FF:978556838'
     provider = get_resource_service('ingest_providers').find_one(name=provider_name, req=None)
     provider_service = self._get_provider_service(provider)
     provider_service.provider = provider
     provider_service.URL = provider.get('config', {}).get('url')
     items = provider_service.fetch_ingest(guid)
     for item in items:
         item['ingest_provider'] = provider['_id']
         item['expiry'] = utcnow() + timedelta(hours=11)
     self.ingest_items(items, provider, provider_service)
     guid = 'tag_reuters.com_2016_newsml_L1N14N0FF:1542761538'
     items = provider_service.fetch_ingest(guid)
     for item in items:
         item['ingest_provider'] = provider['_id']
         item['expiry'] = utcnow() + timedelta(hours=11)
     self.ingest_items(items, provider, provider_service)
     ingest_service = get_resource_service('ingest')
     lookup = {'uri': items[0].get('uri')}
     family_members = ingest_service.get_from_mongo(req=None, lookup=lookup)
     self.assertEqual(family_members.count(), 2)
     for relative in family_members:
         self.assertEqual(relative['pubstatus'], 'canceled')
         self.assertEqual(relative['state'], 'killed')
 def test_get_article_ids(self):
     provider_name = 'reuters'
     provider, provider_service = self.setup_reuters_provider()
     ids = provider_service._get_article_ids('channel1', utcnow(), utcnow() + timedelta(minutes=-10))
     self.assertEqual(len(ids), 3)
     provider = get_resource_service('ingest_providers').find_one(name=provider_name, req=None)
     self.assertEqual(provider['tokens']['poll_tokens']['channel1'], 'ExwaY31kfnR2Z2J1cWZ2YnxoYH9kfw==')
    def test_ingest_update(self):
        provider_name = 'reuters'
        guid = 'tag_reuters.com_2014_newsml_KBN0FL0NN:5'
        provider = get_resource_service('ingest_providers').find_one(name=provider_name, req=None)
        provider_service = self._get_provider_service(provider)
        provider_service.provider = provider
        provider_service.URL = provider.get('config', {}).get('url')
        items = provider_service.fetch_ingest(guid)
        items[0]['ingest_provider'] = provider['_id']
        items[0]['expiry'] = utcnow() + timedelta(hours=11)

        self.ingest_items(items, provider, provider_service)

        self.assertEqual(items[0]['unique_id'], 1)
        original_id = items[0]['_id']

        items = provider_service.fetch_ingest(guid)
        items[0]['ingest_provider'] = provider['_id']
        items[0]['expiry'] = utcnow() + timedelta(hours=11)
        # change the headline
        items[0]['headline'] = 'Updated headline'

        # ingest the item again
        self.ingest_items(items, provider, provider_service)

        # see the update to the headline and unique_id survives
        elastic_item = self.app.data._search_backend('ingest').find_one('ingest', _id=original_id, req=None)
        self.assertEqual(elastic_item['headline'], 'Updated headline')
        self.assertEqual(elastic_item['unique_id'], 1)
        self.assertEqual(elastic_item['unique_name'], '#1')
    def test_ingest_associated_item_renditions(self):
        provider = {'feeding_service': 'ninjs', '_id': self.providers['ninjs']}
        provider_service = FileFeedingService()
        item = {
            'guid': 'foo',
            'type': 'text',
            'versioncreated': utcnow(),
            'associations': {
                'featuremedia': {
                    'guid': 'bar',
                    'type': 'picture',
                    'versioncreated': utcnow(),
                    'renditions': {
                        'original': {
                            'href': 'https://farm8.staticflickr.com/7300/9203849352_297ea4207d_z_d.jpg',
                            'mimetype': 'image/jpeg',
                            'width': 640,
                            'height': 426,
                        }
                    }
                }
            }
        }

        # avoid transfer_renditions call which would store the picture locally
        # and it would fetch it using superdesk url which doesn't work in test
        with patch('superdesk.io.commands.update_ingest.transfer_renditions'):
            status, ids = ingest.ingest_item(item, provider, provider_service)

        self.assertTrue(status)
        self.assertEqual(2, len(ids))
        self.assertIn('thumbnail', item['associations']['featuremedia']['renditions'])
def filter_expired_items(provider, items):
    try:
        days_to_keep_content = provider.get('days_to_keep', DAYS_TO_KEEP)
        expiration_date = utcnow() - timedelta(days=days_to_keep_content)
        return [item for item in items if item.get('versioncreated', utcnow()) > expiration_date]
    except Exception as ex:
        raise ProviderError.providerFilterExpiredContentError(ex, provider)
Beispiel #7
0
 def on_update(self, updates, original):
     # in the case we have a comment
     if original['post_status'] == 'comment':
         original['blog'] = original['groups'][1]['refs'][0]['item']['client_blog']
         updates['blog'] = original['groups'][1]['refs'][0]['item']['client_blog']
         # if the length of the comment is not between 1 and 300 then we get an error
         check_comment_length(original['groups'][1]['refs'][0]['item']['text'])
     # check if updates `content` is diffrent then the original.
     content_diff = False
     if not updates.get('groups', False):
         content_diff = False
     elif len(original['groups'][1]['refs']) != len(updates['groups'][1]['refs']):
         content_diff = True
     else:
         for index, val in enumerate(updates['groups'][1]['refs']):
             item = get_resource_service('archive').find_one(req=None, _id=val['residRef'])
             if item['text'] != original['groups'][1]['refs'][index]['item']['text']:
                 content_diff = True
                 break
     if(content_diff):
         updates['content_updated_date'] = utcnow()
     # check permission
     post = original.copy()
     post.update(updates)
     self.check_post_permission(post)
     # when publishing, put the published item from drafts and contributions at the top of the timeline
     if updates.get('post_status') == 'open' and original.get('post_status') in ('draft', 'submitted', 'comment'):
         updates['order'] = self.get_next_order_sequence(original.get('blog'))
         # if you publish a post it will save a published date and register who did it
         updates['published_date'] = utcnow()
         updates['publisher'] = getattr(flask.g, 'user', None)
     # when unpublishing
     if original.get('post_status') == 'open' and updates.get('post_status') != 'open':
         updates['unpublished_date'] = utcnow()
     super().on_update(updates, original)
    def test_ingest_update(self):
        provider, provider_service = self.setup_reuters_provider()
        items = provider_service.fetch_ingest(reuters_guid)
        items[0]['ingest_provider'] = provider['_id']
        items[0]['expiry'] = utcnow() + timedelta(hours=11)

        self.ingest_items(items, provider, provider_service)

        self.assertEqual(items[0]['unique_id'], 1)
        original_id = items[0]['_id']

        items = provider_service.fetch_ingest(reuters_guid)
        items[0]['ingest_provider'] = provider['_id']
        items[0]['expiry'] = utcnow() + timedelta(hours=11)
        # change the headline
        items[0]['headline'] = 'Updated headline'

        # ingest the item again
        self.ingest_items(items, provider, provider_service)

        # see the update to the headline and unique_id survives
        elastic_item = self.app.data._search_backend('ingest').find_one('ingest', _id=original_id, req=None)
        self.assertEqual(elastic_item['headline'], 'Updated headline')
        self.assertEqual(elastic_item['unique_id'], 1)
        self.assertEqual(elastic_item['unique_name'], '#1')
    def TestLocator(self):
        article = {
            'source': 'AAP',
            'anpa_category': [{'qcode': 's'}],
            'headline': 'This is a test headline',
            'byline': 'joe',
            'slugline': 'slugline',
            'subject': [{'qcode': '15017000'}],
            'anpa_take_key': 'take_key',
            'unique_id': '1',
            'type': 'preformatted',
            'body_html': 'The story body',
            'word_count': '1',
            'priority': '1',
            'firstcreated': utcnow(),
            'versioncreated': utcnow(),
            'lock_user': ObjectId(),
            'place': [{'qcode': 'VIC', 'name': 'VIC'}]
        }

        subscriber = self.app.data.find('subscribers', None, None)[0]
        f = AAPBulletinBuilderFormatter()
        seq, item = f.format(article, subscriber)[0]
        self.assertGreater(int(seq), 0)
        test_article = json.loads(item)
        self.assertEqual(test_article['headline'], 'This is a test headline')
        self.assertEqual(test_article['place'][0]['qcode'], 'CRIK')
        article['anpa_category'] = [{'qcode': 'a'}]
        article['place'] = [{'qcode': 'VIC', 'name': 'VIC'}]
        seq, item = f.format(article, subscriber)[0]
        self.assertGreater(int(seq), 0)
        test_article = json.loads(item)
        self.assertEqual(test_article['headline'], 'This is a test headline')
        self.assertEqual(test_article['place'][0]['qcode'], 'VIC')
    def test_new_zealand_content_with_source_NZN(self):
        article = {
            'source': 'NZN',
            'anpa_category': [{'qcode': 's'}],
            'headline': 'This is a test headline',
            'byline': 'joe',
            'slugline': 'slugline',
            'subject': [{'qcode': '15017000'}],
            'anpa_take_key': 'take_key',
            'unique_id': '1',
            'type': 'text',
            'format': 'preserved',
            'body_html': 'The story body',
            'word_count': '1',
            'priority': '1',
            'firstcreated': utcnow(),
            'versioncreated': utcnow(),
            'lock_user': ObjectId(),
            'task': {
                'desk': self.desks[1][config.ID_FIELD]
            }
        }

        subscriber = self.app.data.find('subscribers', None, None)[0]
        seq, item = self._formatter.format(article, subscriber)[0]
        item = json.loads(item)
        self.assertGreater(int(seq), 0)
        test_article = json.loads(item.get('data'))
        self.assertEqual(test_article['source'], 'NZN')
 def setUp(self):
     try:
         from superdesk.publish.publish_content import get_queue_items
     except ImportError:
         self.fail("Could not import function under test (transmit_items).")
     else:
         self.func_under_test = get_queue_items
         self.queue_items = [
             {'_id': ObjectId(), 'state': 'pending', 'item_id': 'item_1', 'item_version': 4,
              'headline': 'pending headline', 'destination': {}},
             {'_id': ObjectId(), 'state': 'retrying', 'item_id': 'item_2', 'item_version': 4,
              'headline': 'retrying headline', 'retry_attempt': 2,
              'next_retry_attempt_at': utcnow() + timedelta(minutes=30)},
             {'_id': ObjectId(), 'state': 'success', 'item_id': 'item_3', 'item_version': 4,
              'headline': 'success headline', 'retry_attempt': 4,
              'next_retry_attempt_at': utcnow() + timedelta(minutes=-30)},
             {'_id': ObjectId(), 'state': 'failed', 'item_id': 'item_4', 'item_version': 4,
              'headline': 'failed headline', 'retry_attempt': 10,
              'next_retry_attempt_at': utcnow() + timedelta(minutes=-30)},
             {'_id': ObjectId(), 'state': 'canceled', 'item_id': 'item_5', 'item_version': 4,
              'headline': 'canceled headline', 'retry_attempt': 4,
              'next_retry_attempt_at': utcnow() + timedelta(minutes=-30)},
             {'_id': ObjectId(), 'state': 'retrying', 'item_id': 'item_6', 'item_version': 4,
              'headline': 'retrying headline', 'retry_attempt': 2,
              'next_retry_attempt_at': utcnow() + timedelta(minutes=-30)},
         ]
         self.app.data.insert('publish_queue', self.queue_items)
    def test_single_category_allow_features(self):
        article = {
            'source': 'AAP',
            'anpa_category': [{'qcode': 'c'}],
            'headline': 'This is a test headline',
            'byline': 'joe',
            'slugline': 'slugline',
            'subject': [{'qcode': '15017000'}],
            'anpa_take_key': 'take_key',
            'unique_id': '1',
            'type': 'text',
            'format': 'HTML',
            'body_html': 'The story body',
            'word_count': '1',
            'priority': '1',
            'firstcreated': utcnow(),
            'versioncreated': utcnow(),
            'lock_user': ObjectId(),
            'task': {
                'desk': self.desks[0][config.ID_FIELD]
            }
        }

        subscriber = self.app.data.find('subscribers', None, None)[0]
        seq, item = self._formatter.format(article, subscriber)[0]
        item = json.loads(item)
        self.assertGreater(int(seq), 0)
        test_article = json.loads(item.get('data'))
        self.assertEqual(test_article['source'], 'AAP')
        self.assertEqual(test_article['first_category']['qcode'], 'c')
        self.assertEqual(len(test_article['anpa_category']), 1)
        self.assertEqual(test_article['anpa_category'][0]['qcode'], 'c')
    def test_bulletin_builder_formatter(self):
        article = {
            config.ID_FIELD: '123',
            config.VERSION: 2,
            'source': 'AAP',
            'anpa_category': [{'qcode': 'a'}],
            'headline': 'This is a test headline',
            'byline': 'joe',
            'slugline': 'slugline',
            'subject': [{'qcode': '02011001'}],
            'anpa_take_key': 'take_key',
            'unique_id': '1',
            'type': 'preformatted',
            'body_html': 'The story body',
            'abstract': 'abstract',
            'word_count': '1',
            'priority': '1',
            'firstcreated': utcnow(),
            'versioncreated': utcnow(),
            'lock_user': ObjectId()
        }

        subscriber = self.app.data.find('subscribers', None, None)[0]
        seq, item = self._formatter.format(article, subscriber)[0]
        item = json.loads(item)
        self.assertGreater(int(seq), 0)
        self.assertEqual(article[config.ID_FIELD], item.get('id'))
        self.assertEqual(article[config.VERSION], item.get('version'))
        self.assertEqual(article[ITEM_TYPE], item.get(ITEM_TYPE))
        self.assertEqual(article.get(PACKAGE_TYPE, ''), item.get(PACKAGE_TYPE))
        self.assertEqual(article['headline'], item.get('headline'))
        self.assertEqual(article['slugline'], item.get('slugline'))
        formatted_item = json.loads(item.get('data'))
        self.assertEqual(article['headline'], formatted_item['headline'])
    def test_auto_publish_without_abstract_other_source(self):
        article = {
            'source': 'AAP',
            'anpa_category': [{'qcode': 'c'}],
            'headline': 'This is a test headline',
            'auto_publish': True,
            'byline': 'joe',
            'slugline': 'slugline',
            'subject': [{'qcode': '15017000'}],
            'anpa_take_key': 'take_key',
            'unique_id': '1',
            'type': 'text',
            'format': 'HTML',
            'body_html': 'Sydney, AAP - The story body text.',
            'word_count': '1',
            'priority': '1',
            'firstcreated': utcnow(),
            'versioncreated': utcnow(),
            'lock_user': ObjectId(),
            'task': {
                'desk': self.desks[0][config.ID_FIELD]
            }
        }

        subscriber = self.app.data.find('subscribers', None, None)[0]
        seq, item = self._formatter.format(article, subscriber)[0]
        item = json.loads(item)
        self.assertGreater(int(seq), 0)
        test_article = json.loads(item.get('data'))
        self.assertEqual(test_article['source'], 'AAP')
        self.assertEqual(test_article['abstract'], 'This is a test headline')
        self.assertEqual(test_article['slugline'], 'slugline')
        self.assertEqual(test_article['body_text'], 'Sydney, AAP - The story body text.')
    def test_body_footer(self):
        article = {
            'source': 'AAP',
            'anpa_category': [{'qcode': 's'}],
            'headline': 'This is a test headline',
            'byline': 'joe',
            'slugline': 'slugline',
            'subject': [{'qcode': '15017000'}],
            'anpa_take_key': 'take_key',
            'unique_id': '1',
            'type': 'preformatted',
            'body_html': 'The story body',
            'word_count': '1',
            'priority': '1',
            'firstcreated': utcnow(),
            'versioncreated': utcnow(),
            'lock_user': ObjectId(),
            'body_footer': 'call helpline 999 if you are planning to quit smoking'
        }

        subscriber = self.app.data.find('subscribers', None, None)[0]
        seq, item = self._formatter.format(article, subscriber)[0]

        formatted_article = json.loads(item)
        self.assertEqual(formatted_article['body_text'],
                         'The story body call helpline 999 if you are planning to quit smoking')
Beispiel #16
0
def update_provider(provider, rule_set=None, routing_scheme=None):
    """
    Fetches items from ingest provider as per the configuration, ingests them into Superdesk and
    updates the provider.
    """
    if ingest_for_provider_is_already_running(provider):
        return

    try:
        update = {
            LAST_UPDATED: utcnow()
        }

        for items in providers[provider.get('type')].update(provider):
            ingest_items(items, provider, rule_set, routing_scheme)
            stats.incr('ingest.ingested_items', len(items))
            if items:
                update[LAST_ITEM_UPDATE] = utcnow()
        ingest_service = superdesk.get_resource_service('ingest_providers')
        ingest_service.system_update(provider[superdesk.config.ID_FIELD], update, provider)

        if LAST_ITEM_UPDATE not in update and get_is_idle(provider):
            notify_and_add_activity(
                ACTIVITY_EVENT,
                'Provider {{name}} has gone strangely quiet. Last activity was on {{last}}',
                resource='ingest_providers',
                user_list=ingest_service._get_administrators(),
                name=provider.get('name'),
                last=provider[LAST_ITEM_UPDATE].replace(tzinfo=timezone.utc).astimezone(tz=None).strftime("%c"))

        logger.info('Provider {0} updated'.format(provider[superdesk.config.ID_FIELD]))
        push_notification('ingest:update', provider_id=str(provider[superdesk.config.ID_FIELD]))
    finally:
        mark_provider_as_not_running(provider)
    def TestBulletinBuilderFormatter(self):
        article = {
            'source': 'AAP',
            'anpa-category': {'qcode': 'a'},
            'headline': 'This is a test headline',
            'byline': 'joe',
            'slugline': 'slugline',
            'subject': [{'qcode': '02011001'}],
            'anpa_take_key': 'take_key',
            'unique_id': '1',
            'type': 'preformatted',
            'body_html': 'The story body',
            'word_count': '1',
            'priority': '1',
            'firstcreated': utcnow(),
            'versioncreated': utcnow(),
            'lock_user': ObjectId()
        }

        with self.app.app_context():
            subscriber = self.app.data.find('subscribers', None, None)[0]
            f = AAPBulletinBuilderFormatter()
            seq, item = f.format(article, subscriber)[0]
            self.assertGreater(int(seq), 0)
            self.assertEqual(json.dumps(article, default=json_serialize_datetime_objectId), item)
Beispiel #18
0
    def setUp(self):
        self.req = ParsedRequest()
        with self.app.test_request_context(self.app.config.get('URL_PREFIX')):
            self.articles = [{'_id': '1', 'urgency': 1, 'headline': 'story', 'state': 'fetched'},
                             {'_id': '2', 'headline': 'prtorque', 'state': 'fetched'},
                             {'_id': '3', 'urgency': 3, 'state': 'fetched', 'flags': {'marked_for_sms': True}},
                             {'_id': '4', 'urgency': 4, 'state': 'fetched', 'task': {'desk': '1'},
                              'ingest_provider': '1'},
                             {'_id': '5', 'urgency': 2, 'state': 'fetched', 'task': {'desk': '2'}, 'priority': 3},
                             {'_id': '6', 'state': 'fetched', 'embargo': utcnow(),
                             'schedule_settings': {'utc_embargo': utcnow() + timedelta(minutes=20)}},
                             {'_id': '7', 'genre': [{'name': 'Sidebar'}], 'state': 'fetched'},
                             {'_id': '8', 'subject': [{'name': 'adult education',
                                                       'qcode': '05001000',
                                                       'parent': '05000000'},
                                                      {'name': 'high schools',
                                                       'qcode': '05005003',
                                                       'parent': '05005000'}], 'state': 'fetched'},
                             {'_id': '9', 'state': 'fetched', 'anpa_category':
                                 [{'qcode': 'a', 'name': 'Aus News'}]},
                             {'_id': '10', 'body_html': '<p>Mention<p>', 'embargo': utcnow(),
                             'schedule_settings': {'utc_embargo': utcnow() - timedelta(minutes=20)}},
                             {'_id': '11', 'place': [{'qcode': 'NSW', 'name': 'NSW'}], 'state': 'fetched'}]

            self.app.data.insert('archive', self.articles)

            self.app.data.insert('filter_conditions',
                                 [{'_id': 1,
                                   'field': 'headline',
                                   'operator': 'like',
                                   'value': 'tor',
                                   'name': 'test-1'}])
            self.app.data.insert('filter_conditions',
                                 [{'_id': 2,
                                   'field': 'urgency',
                                   'operator': 'in',
                                   'value': '2',
                                   'name': 'test-2'}])
            self.app.data.insert('filter_conditions',
                                 [{'_id': 3,
                                   'field': 'urgency',
                                   'operator': 'in',
                                   'value': '3,4,5',
                                   'name': 'test-2'}])
            self.app.data.insert('filter_conditions',
                                 [{'_id': 4,
                                   'field': 'urgency',
                                   'operator': 'nin',
                                   'value': '1,2,3',
                                   'name': 'test-2'}])
            self.app.data.insert('filter_conditions',
                                 [{'_id': 5,
                                   'field': 'urgency',
                                   'operator': 'in',
                                   'value': '2,5',
                                   'name': 'test-2'}])
            self.app.data.insert('content_filters',
                                 [{"_id": 1,
                                   "content_filter": [{"expression": {"fc": [1]}}],
                                   "name": "soccer-only"}])
def transmit_subscriber_items(self, queue_items, subscriber):
    # Attempt to obtain a lock for transmissions to the subscriber
    lock_name = get_lock_id("Subscriber", "Transmit", subscriber)

    if not lock(lock_name, expire=610):
        return

    for queue_item in queue_items:
        publish_queue_service = get_resource_service(PUBLISH_QUEUE)
        log_msg = (
            "_id: {_id}  item_id: {item_id}  state: {state} "
            "item_version: {item_version} headline: {headline}".format(**queue_item)
        )
        try:
            # check the status of the queue item
            queue_item = publish_queue_service.find_one(req=None, _id=queue_item[config.ID_FIELD])
            if queue_item.get("state") not in [QueueState.PENDING.value, QueueState.RETRYING.value]:
                logger.info(
                    "Transmit State is not pending/retrying for queue item: {}. It is in {}".format(
                        queue_item.get(config.ID_FIELD), queue_item.get("state")
                    )
                )
                continue

            # update the status of the item to in-progress
            queue_update = {"state": "in-progress", "transmit_started_at": utcnow()}
            publish_queue_service.patch(queue_item.get(config.ID_FIELD), queue_update)
            logger.info("Transmitting queue item {}".format(log_msg))

            destination = queue_item["destination"]
            transmitter = superdesk.publish.registered_transmitters[destination.get("delivery_type")]
            transmitter.transmit(queue_item)
            logger.info("Transmitted queue item {}".format(log_msg))
        except Exception as e:
            logger.exception("Failed to transmit queue item {}".format(log_msg))

            max_retry_attempt = app.config.get("MAX_TRANSMIT_RETRY_ATTEMPT")
            retry_attempt_delay = app.config.get("TRANSMIT_RETRY_ATTEMPT_DELAY_MINUTES")
            try:
                orig_item = publish_queue_service.find_one(req=None, _id=queue_item["_id"])
                updates = {config.LAST_UPDATED: utcnow()}

                if orig_item.get("retry_attempt", 0) < max_retry_attempt and not isinstance(
                    e, PublishHTTPPushClientError
                ):

                    updates["retry_attempt"] = orig_item.get("retry_attempt", 0) + 1
                    updates["state"] = QueueState.RETRYING.value
                    updates["next_retry_attempt_at"] = utcnow() + timedelta(minutes=retry_attempt_delay)
                else:
                    # all retry attempts exhausted marking the item as failed.
                    updates["state"] = QueueState.FAILED.value

                publish_queue_service.system_update(orig_item.get(config.ID_FIELD), updates, orig_item)
            except:
                logger.error("Failed to set the state for failed publish queue item {}.".format(queue_item["_id"]))

    # Release the lock for the subscriber
    unlock(lock_name)
Beispiel #20
0
    def update_times(self, doc):
        task = doc.get('task', {})
        status = task.get('status', None)
        if status == 'in_progress':
            task.setdefault('started_at', utcnow())

        if status == 'done':
            task.setdefault('finished_at', utcnow())
Beispiel #21
0
    def update_times(self, doc):
        task = doc.get("task", {})
        status = task.get("status", None)
        if status == "in_progress":
            task.setdefault("started_at", utcnow())

        if status == "done":
            task.setdefault("finished_at", utcnow())
Beispiel #22
0
    def test_remove_published_and_killed_content_separately(self):
        doc = self.articles[0]
        original = doc.copy()

        updates = {'targeted_for': [{'name': 'New South Wales', 'allow': True}]}
        get_resource_service(ARCHIVE).patch(id=original[config.ID_FIELD], updates=updates)

        original.update(updates)
        self._create_and_insert_into_versions(original, False)

        published_version_number = original[config.VERSION] + 1
        get_resource_service(ARCHIVE_PUBLISH).patch(id=doc[config.ID_FIELD],
                                                    updates={ITEM_STATE: CONTENT_STATE.PUBLISHED,
                                                             config.VERSION: published_version_number})

        published_service = get_resource_service(PUBLISHED)
        published_items = published_service.get(req=None, lookup=None)
        self.assertEqual(1, published_items.count())

        article_in_production = get_resource_service(ARCHIVE).find_one(req=None, _id=original[config.ID_FIELD])
        self.assertIsNotNone(article_in_production)
        self.assertEqual(article_in_production[ITEM_STATE], CONTENT_STATE.PUBLISHED)
        self.assertEqual(article_in_production[config.VERSION], published_version_number)
        insert_into_versions(doc=article_in_production)

        # Setting the expiry date of the published article to 1 hr back from now
        published_service.update_published_items(
            original[config.ID_FIELD], 'expiry', utcnow() + timedelta(minutes=-60))

        # Killing the published article and inserting into archive_versions as unittests use service directly
        published_version_number += 1
        get_resource_service(ARCHIVE_KILL).patch(id=doc[config.ID_FIELD],
                                                 updates={ITEM_STATE: CONTENT_STATE.KILLED,
                                                          config.VERSION: published_version_number})

        # Executing the Expiry Job for the Published Article and asserting the collections
        RemoveExpiredPublishContent().run()

        published_items = published_service.get(req=None, lookup=None)
        self.assertEqual(1, published_items.count())

        article_in_production = get_resource_service(ARCHIVE).find_one(req=None, _id=original[config.ID_FIELD])
        self.assertIsNotNone(article_in_production)
        self.assertEqual(article_in_production[ITEM_STATE], CONTENT_STATE.KILLED)
        self.assertEqual(article_in_production[config.VERSION], published_version_number)
        insert_into_versions(doc=article_in_production)

        # Setting the expiry date of the killed article to 1 hr back from now and running the job again
        published_service.update_published_items(
            original[config.ID_FIELD], 'expiry', utcnow() + timedelta(minutes=-60))
        RemoveExpiredPublishContent().run()

        published_items = published_service.get_other_published_items(str(original[config.ID_FIELD]))
        self.assertEqual(0, published_items.count())

        article_in_production = get_resource_service(ARCHIVE).find_one(req=None, _id=original[config.ID_FIELD])
        self.assertIsNone(article_in_production)
 def run(self, provider=None):
     if provider:
         data = superdesk.json.loads(provider)
         data.setdefault('_created', utcnow())
         data.setdefault('_updated', utcnow())
         data.setdefault('name', data['type'])
         db = superdesk.get_db()
         db['ingest_providers'].save(data)
         return data
Beispiel #24
0
 def on_update(self, updates, original):
     # put the published item from drafts at the top of the timeline
     if updates.get('post_status') == 'open' and original.get('post_status') == 'draft':
         updates['order'] = self.get_next_order_sequence()
         # if you publish a post from a draft it will only then have a published_date assign
         updates['published_date'] = utcnow()
     if original.get('post_status') == 'open' and updates.get('post_status') == 'draft':
         updates['unpublished_date'] = utcnow()
     super().on_update(updates, original)
Beispiel #25
0
def enqueue_item(published_item):
    """
    Creates the corresponding entries in the publish queue for the given item
    """
    published_item_id = ObjectId(published_item[config.ID_FIELD])
    published_service = get_resource_service(PUBLISHED)
    archive_service = get_resource_service(ARCHIVE)
    published_update = {QUEUE_STATE: PUBLISH_STATE.IN_PROGRESS, 'last_queue_event': utcnow()}
    try:
        logger.info('Queueing item with id: {} and item_id: {}'.format(published_item_id, published_item['item_id']))

        published_item = published_service.find_one(req=None, _id=published_item_id)
        if published_item.get(QUEUE_STATE) != PUBLISH_STATE.PENDING:
            logger.info('Queue State is not pending for published item {}. It is in {}'.
                        format(published_item_id, published_item.get(QUEUE_STATE)))
            return

        if published_item.get(ITEM_STATE) == CONTENT_STATE.SCHEDULED:
            # if scheduled then change the state to published
            # change the `version` and `versioncreated` for the item
            # in archive collection and published collection.
            versioncreated = utcnow()
            item_updates = {'versioncreated': versioncreated, ITEM_STATE: CONTENT_STATE.PUBLISHED}
            resolve_document_version(document=item_updates, resource=ARCHIVE,
                                     method='PATCH',
                                     latest_doc={config.VERSION: published_item[config.VERSION]})

            # update the archive collection
            archive_item = archive_service.find_one(req=None, _id=published_item['item_id'])
            archive_service.system_update(published_item['item_id'], item_updates, archive_item)
            # insert into version.
            insert_into_versions(published_item['item_id'], doc=None)
            # import to legal archive
            import_into_legal_archive.apply_async(countdown=3, kwargs={'item_id': published_item['item_id']})
            logger.info('Modified the version of scheduled item: {}'.format(published_item_id))

            logger.info('Publishing scheduled item_id: {}'.format(published_item_id))
            # update the published collection
            published_update.update(item_updates)
            published_item.update({'versioncreated': versioncreated,
                                   ITEM_STATE: CONTENT_STATE.PUBLISHED,
                                   config.VERSION: item_updates[config.VERSION]})

        published_service.patch(published_item_id, published_update)
        queued = get_enqueue_service(published_item[ITEM_OPERATION]).enqueue_item(published_item)
        # if the item is queued in the publish_queue then the state is "queued"
        # else the queue state is "queued_not_transmitted"
        queue_state = PUBLISH_STATE.QUEUED if queued else PUBLISH_STATE.QUEUED_NOT_TRANSMITTED
        published_service.patch(published_item_id, {QUEUE_STATE: queue_state})
        logger.info('Queued item with id: {} and item_id: {}'.format(published_item_id, published_item['item_id']))
    except KeyError:
        published_service.patch(published_item_id, {QUEUE_STATE: PUBLISH_STATE.PENDING})
        logger.exception('No enqueue service found for operation %s', published_item[ITEM_OPERATION])
    except:
        published_service.patch(published_item_id, {QUEUE_STATE: PUBLISH_STATE.PENDING})
        raise
    def test_import_into_legal_archive(self):
        archive_publish = get_resource_service('archive_publish')
        archive_correct = get_resource_service('archive_correct')
        legal_archive = get_resource_service('legal_archive')
        archive = get_resource_service('archive_publish')
        published = get_resource_service('published')
        publish_queue = get_resource_service('publish_queue')

        self.original_method = LegalArchiveImport.upsert_into_legal_archive
        LegalArchiveImport.upsert_into_legal_archive = MagicMock()

        for item in self.archive_items:
            archive_publish.patch(item['_id'], {'headline': 'publishing', 'abstract': 'publishing'})

        for item in self.archive_items:
            legal_item = legal_archive.find_one(req=None, _id=item['_id'])
            self.assertIsNone(legal_item, 'Item: {} is not none.'.format(item['_id']))

        archive_correct.patch(self.archive_items[1]['_id'], {'headline': 'correcting', 'abstract': 'correcting'})

        LegalArchiveImport.upsert_into_legal_archive = self.original_method
        self.class_under_test().run(1)

        # items are not expired
        for item in self.archive_items:
            legal_item = legal_archive.find_one(req=None, _id=item['_id'])
            self.assertIsNone(legal_item, 'Item: {} is not none.'.format(item['_id']))

        # expire the items
        for item in self.archive_items:
            original = archive.find_one(req=None, _id=item['_id'])
            archive.system_update(item['_id'], {'expiry': utcnow() - timedelta(minutes=30)}, original)
            published.update_published_items(item['_id'], 'expiry', utcnow() - timedelta(minutes=30))

        # run the command after expiry
        self.class_under_test().run(1)

        # items are expired
        for item in self.archive_items:
            legal_item = legal_archive.find_one(req=None, _id=item['_id'])
            self.assertEqual(legal_item['_id'], item['_id'], 'item {} not imported to legal'.format(item['_id']))

        # items are moved to legal
        for item in self.archive_items:
            published_items = list(published.get_other_published_items(item['_id']))
            for published_item in published_items:
                self.assertEqual(published_item['moved_to_legal'], True)

        # items are moved to legal publish queue
        for item in self.archive_items:
            req = ParsedRequest()
            req.where = json.dumps({'item_id': item['_id']})
            queue_items = list(publish_queue.get(req=req, lookup=None))
            self.assertGreaterEqual(len(queue_items), 1)
            for queue_item in queue_items:
                self.assertEqual(queue_item['moved_to_legal'], True)
 def setUp(self):
     init_app(self.app)
     self.incident[0]['start_date'] = utcnow() - timedelta(hours=10)
     self.incident[0]['end_date'] = utcnow() + timedelta(hours=100)
     self.incident[1]['start_date'] = utcnow() - timedelta(hours=10)
     self.incident[1]['end_date'] = utcnow() + timedelta(hours=100)
     self.app.data.insert('traffic_incidents', self.incident)
     self.app.data.insert('archive', [{'_id': 1}])
     self.app.config['INIT_DATA_PATH'] = os.path.abspath(
         os.path.join(os.path.abspath(os.path.dirname(__file__)), '../../data'))
Beispiel #28
0
 def test_utcnow(self):
     self.assertIsInstance(utcnow(), datetime)
     date1 = get_date(datetime.now(tz=utc))
     date2 = utcnow()
     self.assertEqual(date1.year, date2.year)
     self.assertEqual(date1.month, date2.month)
     self.assertEqual(date1.day, date2.day)
     self.assertEqual(date1.hour, date2.hour)
     self.assertEqual(date1.minute, date2.minute)
     self.assertEqual(date1.second, date2.second)
 def on_create(self, docs):
     # the same content can be published more than once
     # so it is necessary to have a new _id and preserve the original
     for doc in docs:
         doc['item_id'] = doc['_id']
         doc['_created'] = utcnow()
         doc['versioncreated'] = utcnow()
         doc.pop('_id', None)
         doc.pop('lock_user', None)
         doc.pop('lock_time', None)
 def _set_provider_status(self, doc, message=''):
     user = getattr(g, 'user', None)
     if doc.get('is_closed', True):
         doc['last_closed'] = doc.get('last_closed', {})
         doc['last_closed']['closed_at'] = utcnow()
         doc['last_closed']['closed_by'] = user['_id'] if user else None
         doc['last_closed']['message'] = message
     else:
         doc['last_opened'] = doc.get('last_opened', {})
         doc['last_opened']['opened_at'] = utcnow()
         doc['last_opened']['opened_by'] = user['_id'] if user else None
Beispiel #31
0
 def test_get_utc_schedule(self):
     embargo_date = utcnow() + timedelta(minutes=10)
     content = {'embargo': embargo_date}
     utc_schedule = get_utc_schedule(content, 'embargo')
     self.assertEqual(utc_schedule, embargo_date)
Beispiel #32
0
def get_date():
    return utcnow()
Beispiel #33
0
def date_header(datetime):
    return format_datetime(parse_date(datetime if datetime else utcnow()),
                           'EEEE, MMMM d, yyyy')
def generate_text_item(items, template_name, resource_type):
    template = get_resource_service('planning_export_templates').get_export_template(template_name, resource_type)
    archive_service = get_resource_service('archive')
    if not template:
        raise SuperdeskApiError.badRequestError('Invalid template selected')

    for item in items:
        # Create list of assignee with preference to coverage_provider, if not, assigned user
        item['published_archive_items'] = []
        item['assignees'] = []
        item['text_assignees'] = []
        item['contacts'] = []
        text_users = []
        text_desks = []
        users = []
        desks = []

        def enhance_coverage(planning, item, users):
            for c in (planning.get('coverages') or []):
                is_text = c.get('planning', {}).get('g2_content_type', '') == 'text'
                completed = (c.get('assigned_to') or {}).get('state') == ASSIGNMENT_WORKFLOW_STATE.COMPLETED
                assigned_to = c.get('assigned_to') or {}
                user = None
                desk = None
                if assigned_to.get('coverage_provider'):
                    item['assignees'].append(assigned_to['coverage_provider']['name'])
                    if is_text and not completed:
                        item['text_assignees'].append(assigned_to['coverage_provider']['name'])
                elif assigned_to.get('user'):
                    user = assigned_to['user']
                    users.append(user)
                elif assigned_to.get('desk'):
                    desk = assigned_to.get('desk')
                    desks.append(desk)

                # Get abstract from related text item if coverage is 'complete'
                if is_text:
                    if completed:
                        results = list(archive_service.get_from_mongo(req=None,
                                                                      lookup={
                                                                          'assignment_id': ObjectId(
                                                                              c['assigned_to']['assignment_id']),
                                                                          'state': {'$in': ['published', 'corrected']},
                                                                          'pubstatus': 'usable',
                                                                          'rewrite_of': None
                                                                      }))
                        if len(results) > 0:
                            item['published_archive_items'].append({
                                'archive_text': get_first_paragraph_text(results[0].get('abstract')) or '',
                                'archive_slugline': results[0].get('slugline') or ''
                            })
                    elif c.get('news_coverage_status', {}).get('qcode') == 'ncostat:int':
                        if user:
                            text_users.append(user)
                        else:
                            text_desks.append(desk)

            item['contacts'] = get_contacts_from_item(item)

        if resource_type == 'planning':
            enhance_coverage(item, item, users)
        else:
            for p in (item.get('plannings') or []):
                enhance_coverage(p, item, users)

        users = get_resource_service('users').find(where={
            '_id': {'$in': users}
        })

        desks = get_resource_service('desks').find(where={
            '_id': {'$in': desks}
        })

        for u in users:
            name = "{0} {1}".format(u.get('last_name'), u.get('first_name'))
            item['assignees'].append(name)
            if str(u['_id']) in text_users:
                item['text_assignees'].append(name)

        for d in desks:
            item['assignees'].append(d['name'])
            if str(d['_id']) in text_desks:
                item['text_assignees'].append(d['name'])

        set_item_place(item)

        item['description_text'] = item.get('description_text') or (item.get('event') or {}).get('definition_short')
        item['slugline'] = item.get('slugline') or (item.get('event') or {}).get('name')

        # Handle dates and remote time-zones
        if item.get('dates') or (item.get('event') or {}).get('dates'):
            dates = item.get('dates') or item.get('event').get('dates')
            item['schedule'] = utc_to_local(config.DEFAULT_TIMEZONE, dates.get('start'))
            if get_timezone_offset(config.DEFAULT_TIMEZONE, utcnow()) !=\
                    get_timezone_offset(dates.get('tz'), utcnow()):
                item['schedule'] = "{} ({})".format(item['schedule'].strftime('%H%M'), item['schedule'].tzname())
            else:
                item['schedule'] = item['schedule'].strftime('%H%M')

    agendas = []
    if resource_type == 'planning':
        agendas = group_items_by_agenda(items)
        inject_internal_converages(items)

        labels = {}
        cv = get_resource_service('vocabularies').find_one(req=None, _id='g2_content_type')
        if cv:
            labels = {_type['qcode']: _type['name'] for _type in cv['items']}

        for item in items:
            item['coverages'] = [labels.get(coverage.get('planning').get('g2_content_type'),
                                            coverage.get('planning').get('g2_content_type')) +
                                 (' (cancelled)' if coverage.get('workflow_status', '') == 'cancelled' else '')
                                 for coverage in item.get('coverages', [])
                                 if (coverage.get('planning') or {}).get('g2_content_type')]

    article = {}

    for key, value in template.items():
        if value.endswith(".html"):
            article[key.replace('_template', '')] = render_template(value, items=items, agendas=agendas)
        else:
            article[key] = render_template_string(value, items=items, agendas=agendas)

    return article
Beispiel #35
0
def set_item_expiry(doc):
    expiry_minutes = app.settings.get('PLANNING_EXPIRY_MINUTES', None)
    if expiry_minutes is not None:
        doc[ITEM_EXPIRY] = utcnow() + timedelta(minutes=expiry_minutes)
    else:
        doc[ITEM_EXPIRY] = None
    def setUp(self):
        try:
            from apps.legal_archive.commands import ImportLegalArchiveCommand
        except ImportError:
            self.fail("Could not import class under test (ImportLegalArchiveCommand).")
        else:
            self.class_under_test = ImportLegalArchiveCommand
            self.app.data.insert("desks", self.desks)
            self.app.data.insert("users", self.users)
            self.validators = [
                {"schema": {}, "type": "text", "act": "publish", "_id": "publish_text"},
                {"schema": {}, "type": "text", "act": "correct", "_id": "correct_text"},
                {"schema": {}, "type": "text", "act": "kill", "_id": "kill_text"},
            ]

            self.products = [
                {"_id": "1", "name": "prod1"},
                {"_id": "2", "name": "prod2", "codes": "abc,def"},
                {"_id": "3", "name": "prod3", "codes": "xyz"},
            ]

            self.subscribers = [
                {
                    "name": "Test",
                    "is_active": True,
                    "subscriber_type": "wire",
                    "email": "*****@*****.**",
                    "sequence_num_settings": {"max": 9999, "min": 1},
                    "products": ["1"],
                    "destinations": [
                        {
                            "name": "test",
                            "delivery_type": "email",
                            "format": "nitf",
                            "config": {"recipients": "*****@*****.**"},
                        }
                    ],
                }
            ]
            self.app.data.insert("validators", self.validators)
            self.app.data.insert("products", self.products)
            self.app.data.insert("subscribers", self.subscribers)
            self.class_under_test = ImportLegalArchiveCommand
            self.archive_items = [
                {
                    "task": {"desk": self.desks[0]["_id"], "stage": self.desks[0]["incoming_stage"], "user": "******"},
                    "_id": "item1",
                    "state": "in_progress",
                    "headline": "item 1",
                    "type": "text",
                    "slugline": "item 1 slugline",
                    "_current_version": 1,
                    "_created": utcnow() - timedelta(minutes=3),
                    "expired": utcnow() - timedelta(minutes=30),
                },
                {
                    "task": {"desk": self.desks[0]["_id"], "stage": self.desks[0]["incoming_stage"], "user": "******"},
                    "_id": "item2",
                    "state": "in_progress",
                    "headline": "item 2",
                    "type": "text",
                    "slugline": "item 2 slugline",
                    "_current_version": 1,
                    "_created": utcnow() - timedelta(minutes=2),
                    "expired": utcnow() - timedelta(minutes=30),
                },
                {
                    "task": {"desk": self.desks[0]["_id"], "stage": self.desks[0]["incoming_stage"], "user": "******"},
                    "_id": "item3",
                    "state": "in_progress",
                    "headline": "item 2",
                    "type": "text",
                    "slugline": "item 2 slugline",
                    "_current_version": 1,
                    "_created": utcnow() - timedelta(minutes=1),
                    "expired": utcnow() - timedelta(minutes=30),
                },
            ]

            get_resource_service(ARCHIVE).post(self.archive_items)
            for item in self.archive_items:
                resolve_document_version(item, ARCHIVE, "POST")
                insert_into_versions(id_=item["_id"])
def brief_internal_routing(item: dict, **kwargs):
    guid = item.get('guid', 'unknown')
    logger.info('macro started item=%s', guid)

    try:
        assert str(item['profile']) == str(
            _get_profile_id(TEXT_PROFILE)), 'profile is not text'
        assert get_word_count(item['body_html']) < 301, 'body is too long'
        # The title should not start with the word "CORRECTION"
        if item.get('headline'):
            title_start_with_correction = item['headline'].lstrip().startswith(
                'CORRECTION')
            assert not title_start_with_correction, 'The headline/title should not start with word CORRECTION'
    except AssertionError as err:
        logger.info('macro stop on assert item=%s error=%s', guid, err)
        raise StopDuplication()
    except KeyError as err:
        logger.error(err)
        raise StopDuplication()

    item.setdefault('subject', [])
    item['urgency'] = 2
    item['profile'] = _get_profile_id(BRIEF_PROFILE)
    item['subject'] = _get_product_subject(
        _get_brief_subject(item.get('subject', [])))
    item['status'] = CONTENT_STATE.SCHEDULED
    item['operation'] = 'publish'

    _fix_headline(item)
    _fix_body_html(item)

    UTC_FIELD = 'utc_{}'.format(PUBLISH_SCHEDULE)
    try:
        published_at = item[SCHEDULE_SETTINGS][UTC_FIELD]
    except KeyError:
        published_at = utcnow()
    item[SCHEDULE_SETTINGS] = {
        'time_zone': 'Europe/Brussels',
    }

    # Set item publish schedule to 7:30 am for autopublish between 4 to 7 am
    is_press_headline = item.get(
        'headline') and 'press' in item['headline'].lower()
    current_datetime = utc_to_local(superdesk.app.config['DEFAULT_TIMEZONE'],
                                    utcnow())
    if is_press_headline and time(4, 00) <= current_datetime.time() <= time(
            7, 00):
        item[PUBLISH_SCHEDULE] = current_datetime.replace(hour=7,
                                                          minute=30,
                                                          second=00)
        logger.info(
            'Set publish schedule to 7:30 am for autopublish between 4 to 7 am item=%s',
            item.get('guid', 'unknown'))
    else:
        # schedule +30m
        item[PUBLISH_SCHEDULE] = utc_to_local(
            item[SCHEDULE_SETTINGS]['time_zone'],
            published_at + timedelta(minutes=30))

    update_schedule_settings(item, PUBLISH_SCHEDULE, item[PUBLISH_SCHEDULE])
    item[PUBLISH_SCHEDULE] = item[PUBLISH_SCHEDULE].replace(tzinfo=None)

    # remove text in () brackets along with brackets
    if item.get("headline"):
        title = re.sub(r"\([^()]*\)", "", item['headline'])
        item['headline'] = " ".join(title.split())

    # publish
    try:
        internal_destination_auto_publish(item)
    except StopDuplication:
        logger.info('macro done item=%s', guid)
    except DocumentError as err:
        logger.error('validation error when creating brief item=%s error=%s',
                     guid, err)
    except Exception as err:
        logger.exception(err)

    # avoid another item to be created
    raise StopDuplication()
Beispiel #38
0
def update_provider(provider, rule_set=None, routing_scheme=None, sync=False):
    """Fetch items from ingest provider, ingest them into Superdesk and update the provider.

    :param provider: Ingest Provider data
    :param rule_set: Translation Rule Set if one is associated with Ingest Provider.
    :param routing_scheme: Routing Scheme if one is associated with Ingest Provider.
    :param sync: Running in sync mode from cli.
    """
    lock_name = get_lock_id('ingest', provider['name'],
                            provider[superdesk.config.ID_FIELD])

    if not lock(lock_name, expire=UPDATE_TTL + 10):
        if sync:
            logger.error('update is already running for %s', provider['name'])
        return

    try:
        feeding_service = get_feeding_service(provider['feeding_service'])
        update = {LAST_UPDATED: utcnow()}

        if sync:
            provider[LAST_UPDATED] = utcnow() - timedelta(
                days=9999)  # import everything again

        generator = feeding_service.update(provider, update)
        if isinstance(generator, list):
            generator = (items for items in generator)
        failed = None
        while True:
            try:
                items = generator.send(failed)
                failed = ingest_items(items, provider, feeding_service,
                                      rule_set, routing_scheme)
                update_last_item_updated(update, items)
            except StopIteration:
                break

        # Some Feeding Services update the collection and by this time the _etag might have been changed.
        # So it's necessary to fetch it once again. Otherwise, OriginalChangedError is raised.
        ingest_provider_service = superdesk.get_resource_service(
            'ingest_providers')
        provider = ingest_provider_service.find_one(
            req=None, _id=provider[superdesk.config.ID_FIELD])
        ingest_provider_service.system_update(
            provider[superdesk.config.ID_FIELD], update, provider)

        if LAST_ITEM_UPDATE not in update and get_is_idle(provider):
            admins = superdesk.get_resource_service(
                'users').get_users_by_user_type('administrator')
            notify_and_add_activity(
                ACTIVITY_EVENT,
                'Provider {{name}} has gone strangely quiet. Last activity was on {{last}}',
                resource='ingest_providers',
                user_list=admins,
                name=provider.get('name'),
                last=provider[LAST_ITEM_UPDATE].replace(
                    tzinfo=timezone.utc).astimezone(tz=None).strftime("%c"))

        logger.info('Provider {0} updated'.format(
            provider[superdesk.config.ID_FIELD]))

        if LAST_ITEM_UPDATE in update:  # Only push a notification if there has been an update
            push_notification('ingest:update',
                              provider_id=str(
                                  provider[superdesk.config.ID_FIELD]))
    except Exception as e:
        logger.error("Failed to ingest file: {error}".format(error=e))
        raise IngestFileError(3000, e, provider)
    finally:
        unlock(lock_name)
Beispiel #39
0
    def lock(self, item_filter, user_id, session_id, action):
        item_model = get_model(ItemModel)
        item = item_model.find_one(item_filter)

        # set the lock_id it per item
        lock_id = "item_lock {}".format(item.get(config.ID_FIELD))

        if not item:
            raise SuperdeskApiError.notFoundError()

        # get the lock it not raise forbidden exception
        if not lock(lock_id, expire=5):
            raise SuperdeskApiError.forbiddenError(message="Item is locked by another user.")

        try:
            can_user_lock, error_message = self.can_lock(item, user_id, session_id)

            if can_user_lock:
                self.app.on_item_lock(item, user_id)
                updates = {LOCK_USER: user_id, LOCK_SESSION: session_id, 'lock_time': utcnow()}
                if action:
                    updates['lock_action'] = action

                item_model.update(item_filter, updates)

                if item.get(TASK):
                    item[TASK]['user'] = user_id
                else:
                    item[TASK] = {'user': user_id}

                superdesk.get_resource_service('tasks').assign_user(item[config.ID_FIELD], item[TASK])
                self.app.on_item_locked(item, user_id)
                push_notification('item:lock',
                                  item=str(item.get(config.ID_FIELD)),
                                  item_version=str(item.get(config.VERSION)),
                                  user=str(user_id), lock_time=updates['lock_time'],
                                  lock_session=str(session_id))
            else:
                raise SuperdeskApiError.forbiddenError(message=error_message)

            item = item_model.find_one(item_filter)
            return item
        finally:
            # unlock the lock :)
            unlock(lock_id, remove=True)
Beispiel #40
0
class NewsMLG2Formatter(Formatter):
    """NewsML G2 Formatter"""

    XML_ROOT = '<?xml version="1.0" encoding="UTF-8"?>'
    now = utcnow()
    string_now = now.strftime('%Y-%m-%dT%H:%M:%S.0000Z')

    _message_nsmap = {None: 'http://iptc.org/std/nar/2006-10-01/', 'x': 'http://www.w3.org/1999/xhtml',
                      'xsi': 'http://www.w3.org/2001/XMLSchema-instance'}

    _debug_message_extra = {'{{{}}}schemaLocation'.format(_message_nsmap['xsi']): 'http://iptc.org/std/nar/2006-10-01/ \
    http://www.iptc.org/std/NewsML-G2/2.18/specification/NewsML-G2_2.18-spec-All-Power.xsd'}

    def format(self, article, subscriber, codes=None):
        """Create article in NewsML G2 format

        :param dict article:
        :param dict subscriber:
        :param list codes: selector codes
        :return [(int, str)]: return a List of tuples. A tuple consist of
            publish sequence number and formatted article string.
        :raises FormatterError: if the formatter fails to format an article
        """
        try:
            pub_seq_num = superdesk.get_resource_service('subscribers').generate_sequence_number(subscriber)
            is_package = self._is_package(article)
            news_message = etree.Element('newsMessage', attrib=self._debug_message_extra, nsmap=self._message_nsmap)
            self._format_header(article, news_message, pub_seq_num)
            item_set = self._format_item(news_message)
            if is_package:
                item = self._format_item_set(article, item_set, 'packageItem')
                self._format_groupset(article, item)
            elif article[ITEM_TYPE] in {CONTENT_TYPE.PICTURE, CONTENT_TYPE.AUDIO, CONTENT_TYPE.VIDEO}:
                item = self._format_item_set(article, item_set, 'newsItem')
                self._format_contentset(article, item)
            else:
                nitfFormater = NITFFormatter()
                nitf = nitfFormater.get_nitf(article, subscriber, pub_seq_num)
                newsItem = self._format_item_set(article, item_set, 'newsItem')
                self._format_content(article, newsItem, nitf)

            return [(pub_seq_num, self.XML_ROOT + etree.tostring(news_message).decode('utf-8'))]
        except Exception as ex:
            raise FormatterError.newmsmlG2FormatterError(ex, subscriber)

    def _is_package(self, article):
        """Given an article returns if it is a none takes package or not

        :param artcile:
        :return: True is package
        """
        return article[ITEM_TYPE] == CONTENT_TYPE.COMPOSITE and article.get(PACKAGE_TYPE, '') == ''

    def _format_header(self, article, news_message, pub_seq_num):
        """Creates the header element of the newsMessage.

        :param dict article:
        :param Element news_message:
        :param int pub_seq_num:
        """
        header = SubElement(news_message, 'header')
        SubElement(header, 'sent').text = self.string_now
        SubElement(header, 'sender').text = get_newsml_provider_id()
        SubElement(header, 'transmitId').text = str(pub_seq_num)
        SubElement(header, 'priority').text = str(article.get('priority', 5))
        SubElement(header, 'origin').text = article.get('original_source', article.get('source', ''))

    def _format_item(self, news_message):
        return SubElement(news_message, 'itemSet')

    def _format_item_set(self, article, item_set, item_type):
        """Construct the item element (newsItem or packageItem) and append the item_meta and contentMeta entities

        :param dict article:
        :param element item_set:
        :param str item_type:
        """
        item = SubElement(item_set, item_type, attrib={'standard': 'NewsML-G2', 'standardversion': '2.18',
                                                       'guid': article['guid'],
                                                       'version': str(article[superdesk.config.VERSION]),
                                                       XML_LANG: article.get('language', 'en'),
                                                       'conformance': 'power'})
        SubElement(item, 'catalogRef',
                   attrib={'href': 'http://www.iptc.org/std/catalog/catalog.IPTC-G2-Standards_25.xml'})
        self._format_rights(item, article)
        item_meta = SubElement(item, 'itemMeta')
        self._format_itemClass(article, item_meta)
        self._format_provider(item_meta)
        self._format_versioncreated(article, item_meta)
        self._format_firstcreated(article, item_meta)
        self._format_pubstatus(article, item_meta)

        if article.get(EMBARGO):
            SubElement(item_meta, 'embargoed').text = \
                get_utc_schedule(article, EMBARGO).isoformat()

        # optional properties
        self._format_ednote(article, item_meta)
        self._format_signal(article, item_meta)

        content_meta = SubElement(item, 'contentMeta')
        SubElement(content_meta, 'urgency').text = str(article.get('urgency', 5))
        self._format_timestamps(article, content_meta)
        self._format_creator(article, content_meta)
        self._format_located(article, content_meta)
        self._format_subject(article, content_meta)
        self._format_genre(article, content_meta)
        self._format_slugline(article, content_meta)
        self._format_headline(article, content_meta)
        self._format_place(article, content_meta)
        self._format_category(article, content_meta)
        self._format_company_codes(article, content_meta, item)

        if article[ITEM_TYPE] in {CONTENT_TYPE.PICTURE, CONTENT_TYPE.AUDIO, CONTENT_TYPE.VIDEO}:
            self._format_description(article, content_meta)
            self._format_creditline(article, content_meta)
        return item

    def _format_content(self, article, news_item, nitf):
        """Adds the content set to the xml

        :param dict article:
        :param Element newsItem:
        :param Element nitf:
        """
        content_set = SubElement(news_item, 'contentSet')
        if article.get(FORMAT) == FORMATS.PRESERVED:
            inline_data = get_text(self.append_body_footer(article))
            SubElement(content_set, 'inlineData',
                       attrib={'contenttype': 'text/plain'}).text = inline_data
        elif article[ITEM_TYPE] in [CONTENT_TYPE.TEXT, CONTENT_TYPE.COMPOSITE]:
            inline = SubElement(content_set, 'inlineXML',
                                attrib={'contenttype': 'application/nitf+xml'})
            inline.append(nitf)

    def _format_rights(self, newsItem, article):
        """Adds the rightsholder section to the newsItem

        :param Element newsItem:
        :param dict article:
        """
        rights = superdesk.get_resource_service('vocabularies').get_rightsinfo(article)
        rightsinfo = SubElement(newsItem, 'rightsInfo')
        holder = SubElement(rightsinfo, 'copyrightHolder')
        SubElement(holder, 'name').text = rights['copyrightholder']
        SubElement(rightsinfo, 'copyrightNotice').text = rights['copyrightnotice']
        SubElement(rightsinfo, 'usageTerms').text = rights['usageterms']

    # itemClass elements
    def _format_itemClass(self, article, item_meta):
        """Append the item class to the item_meta data element

        :param dict article:
        :param Element item_meta:
        """
        if CONTENT_TYPE.COMPOSITE and self._is_package(article):
            SubElement(item_meta, 'itemClass', attrib={'qcode': 'ninat:composite'})
            return
        if article[ITEM_TYPE] in {CONTENT_TYPE.TEXT, CONTENT_TYPE.PREFORMATTED, CONTENT_TYPE.COMPOSITE}:
            SubElement(item_meta, 'itemClass', attrib={'qcode': 'ninat:text'})
        elif article[ITEM_TYPE] in {CONTENT_TYPE.PICTURE, CONTENT_TYPE.AUDIO, CONTENT_TYPE.VIDEO}:
            SubElement(item_meta, 'itemClass', attrib={'qcode': 'ninat:%s' % article[ITEM_TYPE].lower()})

    def _format_provider(self, item_meta):
        """Appends the provider element to the item_meta element

        :param dict article:
        :param Element item_meta:
        """
        provider = SubElement(item_meta, 'provider')
        SubElement(provider, 'name').text = get_newsml_provider_id()

    def _format_versioncreated(self, article, item_meta):
        """Appends the versionCreated element to the item_meta element.

        :param dict article:
        :param Element item_meta:
        """
        SubElement(item_meta, 'versionCreated').text = article['versioncreated'].strftime('%Y-%m-%dT%H:%M:%S+00:00')

    def _format_firstcreated(self, article, item_meta):
        """Appends the firstCreated element to the item_meta element.

        :param dict article:
        :param Element item_meta:
        """
        SubElement(item_meta, 'firstCreated').text = article['firstcreated'].strftime('%Y-%m-%dT%H:%M:%S+00:00')

    def _format_pubstatus(self, article, item_meta):
        """Appends the pubStatus element to the item_meta element.

        :param dict article:
        :param Element item_meta:
        """
        SubElement(item_meta, 'pubStatus', attrib={'qcode': 'stat:' + article.get('pubstatus', 'usable')})

    def _format_signal(self, article, item_meta):
        """Appends the signal element to the item_meta element.

        :param dict article:
        :param Element item_meta:
        """
        if article['state'] == 'Corrected':
            SubElement(item_meta, 'signal', attrib={'qcode': 'sig:correction'})
        else:
            SubElement(item_meta, 'signal', attrib={'qcode': 'sig:update'})

    def _format_ednote(self, article, item_meta):
        """Appends the edNote element to the item_meta element.

        :param dict article:
        :param Element item_meta:
        """
        if 'ednote' in article and article.get('ednote', '') != '':
            SubElement(item_meta, 'edNote').text = article.get('ednote', '')

    # contentMeta elements
    def _format_timestamps(self, article, content_meta):
        """Appends the contentCreated and contentModified element to the contentMeta element.

        :param dict article:
        :param Element content_meta:
        """
        SubElement(content_meta, 'contentCreated').text = article['firstcreated'].strftime('%Y-%m-%dT%H:%M:%S+00:00')
        SubElement(content_meta, 'contentModified').text = article['versioncreated'].strftime('%Y-%m-%dT%H:%M:%S+00:00')

    def _format_creator(self, article, content_meta):
        """Appends the creator element to the contentMeta element

        :param dict article:
        :param Element content_meta:
        """
        if 'byline' in article:
            creator = SubElement(content_meta, 'creator')
            SubElement(creator, 'name').text = article.get('byline', '') or ''

    def _format_subject(self, article, content_meta):
        """Appends the subject element to the contentMeta element

        :param dict article:
        :param Element content_meta:
        """
        if 'subject' in article and len(article['subject']) > 0:
            for s in article['subject']:
                if 'qcode' in s:
                    subj = SubElement(content_meta, 'subject',
                                      attrib={'type': 'cpnat:abstract', 'qcode': 'subj:' + s['qcode']})
                    SubElement(subj, 'name', attrib={XML_LANG: 'en'}).text = s['name']

    def _format_genre(self, article, content_meta):
        """Appends the genre element to the contentMeta element

        :param dict article:
        :param Element content_meta:
        """
        if 'genre' in article and len(article['genre']) > 0:
            for g in article['genre']:
                genre = SubElement(content_meta, 'genre')
                SubElement(genre, 'name', attrib={XML_LANG: 'en'}).text = g.get('name', '')

    def _format_category(self, article, content_meta):
        """Appends the subject element to the contentMeta element

        :param dict article:
        :param Element content_meta:
        """
        for category in article.get('anpa_category', []):
            subject = SubElement(content_meta, 'subject',
                                 attrib={'type': 'cpnat:abstract', 'qcode': 'cat:' + category['qcode']})
            SubElement(subject, 'name', attrib={XML_LANG: 'en'}).text = category.get('name', '')

    def _format_slugline(self, article, content_meta):
        """Appends the slugline element to the contentMeta element

        :param dict article:
        :param Element content_meta:
        """
        SubElement(content_meta, 'slugline').text = article.get('slugline', '')

    def _format_headline(self, article, content_meta):
        """Appends the headline element to the contentMeta element

        :param dict article:
        :param Element content_meta:
        """
        SubElement(content_meta, 'headline').text = article.get('headline', '')

    def _format_place(self, article, content_meta):
        """Appends the subject (of type geoArea) element to the contentMeta element

        :param dict article:
        :param Element content_meta:
        """
        if not article.get('place'):
            return

        for place in article.get('place', []):
            if place.get('state'):
                subject = self._create_subject_element(content_meta, place.get('state'), 'loctyp:CountryArea')
                self._create_broader_element(subject, place.get('country'), 'loctyp:Country')
                self._create_broader_element(subject, place.get('world_region'), 'loctyp:WorldArea')
            elif place.get('country'):
                subject = self._create_subject_element(content_meta, place.get('country'), 'loctyp:Country')
                self._create_broader_element(subject, place.get('world_region'), 'loctyp:WorldArea')
            elif place.get('world_region'):
                self._create_subject_element(content_meta, place.get('world_region'), 'loctyp:WorldArea')

    def _create_broader_element(self, parent, broader_name, qcode, concept_type='cpnat:abstract'):
        """Create broader element.

        :param element parent: parent element under which the broader element is created
        :param str broader_name: value for the name element
        :param str qcode:
        :param str concept_type:
        """
        if broader_name:
            broader_elm = SubElement(parent, 'broader',
                                     attrib={'type': concept_type, 'qcode': qcode})
            SubElement(broader_elm, 'name').text = broader_name

    def _create_subject_element(self, parent, subject_name, qcode, concept_type='cpnat:abstract'):
        """Create a subject element

        :param element parent:
        :param str subject_name: value for the name element
        :param str qcode:
        :param str concept_type:
        :return: returns the subject element.
        """
        subject_elm = SubElement(parent, 'subject',
                                 attrib={'type': concept_type, 'qcode': qcode})
        SubElement(subject_elm, 'name').text = subject_name
        return subject_elm

    def _format_located(self, article, content_meta):
        """Appends the located element to the contentMeta element

        :param dict article:
        :param Element content_meta:
        """
        located = article.get('dateline', {}).get('located', {})
        if located and located.get('city'):
            located_elm = SubElement(content_meta, 'located',
                                     attrib={'type': 'cpnat:abstract', 'qcode': 'loctyp:City'})
            SubElement(located_elm, "name").text = located.get('city')
            self._create_broader_element(located_elm, located.get('state'), 'loctyp:CountryArea')
            self._create_broader_element(located_elm, located.get('country'), 'loctyp:Country')

        if article.get('dateline', {}).get('text', {}):
            SubElement(content_meta, 'dateline').text = article.get('dateline', {}).get('text', {})

    def _format_description(self, article, content_meta):
        """Appends the image description to the contentMeta element

        :param article:
        :param content_meta:
        """
        SubElement(content_meta, 'description', attrib={'role': 'drol:caption'}).text = article.get('description', '')

    def _format_creditline(self, article, content_meta):
        """Append the creditLine to the contentMeta for a picture

        :param article:
        :param content_meta:
        """
        SubElement(content_meta, 'creditline').text = article.get('original_source', article.get('source', ''))

    def _format_groupset(self, article, item):
        """Constructs the groupSet element of a packageItem

        :param article:
        :param item:
        :return: groupSet appended to the item
        """
        groupSet = SubElement(item, 'groupSet', attrib={'root': 'root'})
        for group in article.get(GROUPS, []):
            group_Elem = SubElement(groupSet, 'group', attrib={'id': group.get(GROUP_ID),
                                                               'role': group.get(ROLE)})
            for ref in group.get(REFS, []):
                if ID_REF in ref:
                    SubElement(group_Elem, 'groupRef', attrib={'idref': ref.get(ID_REF)})
                else:
                    if RESIDREF in ref:
                        # get the current archive item being refered to
                        archive_item = superdesk.get_resource_service(ARCHIVE).find_one(req=None,
                                                                                        _id=ref.get(RESIDREF))
                        if archive_item:
                            itemRef = SubElement(group_Elem, 'itemRef',
                                                 attrib={'residref': ref.get(RESIDREF),
                                                         'contenttype': 'application/vnd.iptc.g2.newsitem+xml'})
                            SubElement(itemRef, 'itemClass', attrib={'qcode': 'ninat:' + ref.get(ITEM_TYPE, 'text')})
                            self._format_pubstatus(archive_item, itemRef)
                            self._format_headline(archive_item, itemRef)
                            self._format_slugline(archive_item, itemRef)

    def _format_contentset(self, article, item):
        """Constructs the contentSet element in a picture, video and audio newsItem.

        :param article:
        :param item:
        :return: contentSet Element added to the item
        """
        content_set = SubElement(item, 'contentSet')
        for rendition, value in article.get('renditions', {}).items():
            attrib = {'href': value.get('href'),
                      'contenttype': value.get('mimetype', ''),
                      'rendition': 'rendition:' + rendition
                      }
            if article.get(ITEM_TYPE) == CONTENT_TYPE.PICTURE:
                if 'height' in value:
                    attrib['height'] = str(value.get('height'))
                if 'width' in value:
                    attrib['width'] = str(value.get('width'))
            elif article.get(ITEM_TYPE) in {CONTENT_TYPE.VIDEO, CONTENT_TYPE.AUDIO}:
                if get_filemeta(article, 'width'):
                    attrib['width'] = str(get_filemeta(article, 'width'))
                if get_filemeta(article, 'height'):
                    attrib['height'] = str(get_filemeta(article, 'height'))
                if get_filemeta(article, 'duration'):
                    attrib['duration'] = get_filemeta(article, 'duration')
                    attrib['durationunit'] = 'timeunit:normalPlayTime'

            if rendition == 'original' and get_filemeta(article, 'length'):
                attrib['size'] = str(get_filemeta(article, 'length'))
            SubElement(content_set, 'remoteContent', attrib=attrib)

    def _format_company_codes(self, article, content_meta, item):
        """Format copy codes.

        For each company in the article, appends the subject element to the contentMeta element
        and assert element to item

        :param article: object having published article details
        :type article: dict
        :param content_meta: object representing <contentMeta> in the XML tree
        :type content_meta: lxml.etree.Element
        :param item: object representing <newsItem> in the XML tree
        :type item: lxml.etree.Element
        """

        for company in article.get('company_codes', []):
            literal_name = company['qcode']
            subject = SubElement(content_meta, 'subject',
                                 attrib={'type': 'cpnat:organisation', 'literal': literal_name})
            SubElement(subject, 'name').text = company.get('name', '')

            assert_element = SubElement(item, 'assert', attrib={'literal': literal_name})
            org_details_element = SubElement(assert_element, 'organisationDetails')
            SubElement(org_details_element, 'hasInstrument',
                       attrib={'symbol': company.get('qcode', ''), 'marketlabel': company.get('security_exchange', '')})

    def can_format(self, format_type, article):
        """Method check if the article can be formatted to NewsML G2 or not.

        :param str format_type:
        :param dict article:
        :return: True if article can formatted else False
        """
        return format_type == 'newsmlg2' and \
            article[ITEM_TYPE] in {CONTENT_TYPE.TEXT, CONTENT_TYPE.PREFORMATTED, CONTENT_TYPE.COMPOSITE,
                                   CONTENT_TYPE.PICTURE, CONTENT_TYPE.VIDEO, CONTENT_TYPE.AUDIO}
class RemoveSpikedContentTestCase(TestCase):

    articles = [{'guid': 'tag:localhost:2015:69b961ab-2816-4b8a-a584-a7b402fed4f9',
                 '_id': '1',
                 'type': 'text',
                 'last_version': 3,
                 '_current_version': 4,
                 'body_html': 'Test body',
                 'urgency': 4,
                 'headline': 'Two students missing',
                 'pubstatus': 'usable',
                 'firstcreated': utcnow(),
                 'byline': 'By Alan Karben',
                 'ednote': 'Andrew Marwood contributed to this article',
                 'keywords': ['Student', 'Crime', 'Police', 'Missing'],
                 'subject':[{'qcode': '17004000', 'name': 'Statistics'},
                            {'qcode': '04001002', 'name': 'Weather'}],
                 'state': 'draft',
                 'expiry': utcnow() + timedelta(minutes=20),
                 'unique_name': '#1'},
                {'guid': 'tag:localhost:2015:69b961ab-2816-4b8a-a974-xy4532fe33f9',
                 '_id': '2',
                 'last_version': 3,
                 '_current_version': 4,
                 'body_html': 'Test body of the second article',
                 'urgency': 4,
                 'headline': 'Another two students missing',
                 'pubstatus': 'usable',
                 'firstcreated': utcnow(),
                 'byline': 'By Alan Karben',
                 'ednote': 'Andrew Marwood contributed to this article',
                 'keywords': ['Student', 'Crime', 'Police', 'Missing'],
                 'subject':[{'qcode': '17004000', 'name': 'Statistics'},
                            {'qcode': '04001002', 'name': 'Weather'}],
                 'expiry': utcnow() + timedelta(minutes=20),
                 'state': 'draft',
                 'type': 'text',
                 'unique_name': '#2'},
                {'guid': 'tag:localhost:2015:69b961ab-2816-4b8a-a584-a7b402fed4fa',
                 '_id': '3',
                 '_current_version': 4,
                 'body_html': 'Test body',
                 'urgency': 4,
                 'headline': 'Two students missing killed',
                 'pubstatus': 'usable',
                 'firstcreated': utcnow(),
                 'byline': 'By Alan Karben',
                 'ednote': 'Andrew Marwood contributed to this article killed',
                 'keywords': ['Student', 'Crime', 'Police', 'Missing'],
                 'subject':[{'qcode': '17004000', 'name': 'Statistics'},
                            {'qcode': '04001002', 'name': 'Weather'}],
                 'state': 'draft',
                 'expiry': utcnow() + timedelta(minutes=20),
                 'type': 'text',
                 'unique_name': '#3'},
                {'guid': 'tag:localhost:2015:69b961ab-2816-4b8a-a584-a7b402fed4fc',
                 '_id': '4',
                 '_current_version': 3,
                 'state': 'draft',
                 'type': 'composite',
                 'groups': [{'id': 'root', 'refs': [{'idRef': 'main'}], 'role': 'grpRole:NEP'},
                            {
                                'id': 'main',
                                'refs': [
                                    {
                                        'location': 'archive',
                                        'guid': '1',
                                        'residRef': '1',
                                        'type': 'text'
                                    },
                                    {
                                        'location': 'archive',
                                        'residRef': '2',
                                        'guid': '2',
                                        'type': 'text'
                                    }
                                ],
                                'role': 'grpRole:main'}],
                 'firstcreated': utcnow(),
                 'expiry': utcnow() + timedelta(minutes=20),
                 'unique_name': '#4'},
                {'guid': 'tag:localhost:2015:69b961ab-4b8a-a584-2816-a7b402fed4fc',
                 '_id': '5',
                 '_current_version': 3,
                 'state': 'draft',
                 'type': 'composite',
                 'groups': [{'id': 'root', 'refs': [{'idRef': 'main'}, {'idRef': 'story'}], 'role': 'grpRole:NEP'},
                            {
                                'id': 'main',
                                'refs': [
                                    {
                                        'location': 'archive',
                                        'guid': '1',
                                        'residRef': '1',
                                        'type': 'text'
                                    }
                                ],
                                'role': 'grpRole:main'},
                            {
                                'id': 'story',
                                'refs': [
                                    {
                                        'location': 'archive',
                                        'guid': '4',
                                        'residRef': '4',
                                        'type': 'composite'
                                    }
                                ],
                                'role': 'grpRole:story'}],
                 'firstcreated': utcnow(),
                 'expiry': utcnow() + timedelta(minutes=20),
                 'unique_name': '#5'}]

    media = {
        'viewImage': {
            'media': '1592730d582080f4e9fcc2fcf43aa357bda0ed19ffe314ee3248624cd4d4bc54',
            'mimetype': 'image/jpeg',
            'href': 'http://192.168.220.209/api/upload/abc/raw?_schema=http',
            'height': 452,
            'width': 640
        },
        'thumbnail': {
            'media': '52250b4f37da50ee663fdbff057a5f064479f8a8bbd24fb8fdc06135d3f807bb',
            'mimetype': 'image/jpeg',
            'href': 'http://192.168.220.209/api/upload/abc/raw?_schema=http',
            'height': 120,
            'width': 169
        },
        'baseImage': {
            'media': '7a608aa8f51432483918027dd06d0ef385b90702bfeba84ac4aec38ed1660b18',
            'mimetype': 'image/jpeg',
            'href': 'http://192.168.220.209/api/upload/abc/raw?_schema=http',
            'height': 990,
            'width': 1400
        },
        'original': {
            'media': 'stub.jpeg',
            'mimetype': 'image/jpeg',
            'href': 'http://192.168.220.209/api/upload/stub.jpeg/raw?_schema=http',
            'height': 2475,
            'width': 3500
        }
    }

    def test_query_getting_expired_content(self):
        now = utcnow()

        self.app.data.insert(ARCHIVE, [
            {'expiry': get_expiry_date(0), 'state': 'spiked'},
            {'expiry': get_expiry_date(10), 'state': 'spiked'},
            {'expiry': get_expiry_date(20), 'state': 'spiked'},
            {'expiry': get_expiry_date(30), 'state': 'spiked'},
            {'expiry': None, 'state': 'spiked'},
            {'unique_id': 97, 'state': 'spiked'},
            {'expiry': now - timedelta(minutes=10), 'state': 'spiked', 'unique_id': 100},
        ])

        expired_items = get_resource_service(ARCHIVE).get_expired_items(now)
        now = utcnow()
        for expired_items in get_resource_service(ARCHIVE).get_expired_items(now):
            self.assertEquals(1, len(expired_items))
            self.assertEquals(100, expired_items[0]['unique_id'])

    def test_remove_media_files_for_picture(self):
        item = {
            '_id': 'testimage',
            'type': 'picture',
            'renditions': self.media
        }

        original = item.copy()
        with patch.object(self.app.media, 'delete') as media_delete:
            CropService().update_media_references(item, original)
            references_service = get_resource_service('media_references')
            refs = references_service.get(req=None, lookup={'item_id': 'testimage'})
            self.assertEqual(refs.count(), 4)
            for ref in refs:
                self.assertEqual(ref.get('published'), False)
            CropService().update_media_references(item, original, True)
            refs = references_service.get(req=None, lookup={'item_id': 'testimage'})
            for ref in refs:
                self.assertEqual(ref.get('published'), True)

            remove_media_files(item)
            self.assertEqual(0, media_delete.call_count)

            item = {
                '_id': 'testimage2',
                'type': 'picture',
                'renditions': self.media
            }

            original = item.copy()
            CropService().update_media_references(item, original)
            references_service = get_resource_service('media_references')
            refs = references_service.get(req=None, lookup={'item_id': 'testimage2'})
            self.assertEqual(refs.count(), 4)
            for ref in refs:
                self.assertEqual(ref.get('published'), False)

            remove_media_files(item)
            self.assertEqual(0, media_delete.call_count)

            item = {
                '_id': 'testimage3',
                'type': 'picture',
                'renditions': {
                    'viewImage': {
                        'media': '123',
                        'mimetype': 'image/jpeg',
                        'href': 'http://192.168.220.209/api/upload/abc/raw?_schema=http',
                        'height': 452,
                        'width': 640
                    },
                    'thumbnail': {
                        'media': '456',
                        'mimetype': 'image/jpeg',
                        'href': 'http://192.168.220.209/api/upload/abc/raw?_schema=http',
                        'height': 120,
                        'width': 169
                    }
                }
            }

            original = item.copy()
            CropService().update_media_references(item, original)
            references_service = get_resource_service('media_references')
            refs = references_service.get(req=None, lookup={'item_id': 'testimage3'})
            self.assertEqual(refs.count(), 2)
            for ref in refs:
                self.assertEqual(ref.get('published'), False)

            remove_media_files(item)
            self.assertEqual(2, media_delete.call_count)
            for key, rendition in item.get('renditions').items():
                media_delete.assert_any_call(rendition['media'])

    def test_remove_media_files_for_picture_associations(self):
        item = {
            '_id': 'testimage',
            'type': 'text',
            'associations': {
                'featuremedia': {
                    '_id': '123',
                    'renditions': self.media
                },
                'featurevideo': {
                    '_id': '456',
                    'renditions': {
                        'viewImage': {
                            'media': 'testing_123',
                            'mimetype': 'image/jpeg',
                            'href': 'http://192.168.220.209/api/upload/abc/raw?_schema=http',
                            'height': 452,
                            'width': 640
                        },
                        'thumbnail': {
                            'media': 'testing_456',
                            'mimetype': 'image/jpeg',
                            'href': 'http://192.168.220.209/api/upload/abc/raw?_schema=http',
                            'height': 120,
                            'width': 169
                        }
                    }
                }
            }
        }

        original = item.copy()
        with patch.object(self.app.media, 'delete') as media_delete:
            CropService().update_media_references(item, original)
            references_service = get_resource_service('media_references')
            refs = references_service.get(req=None, lookup={'item_id': 'testimage'})
            self.assertEqual(refs.count(), 6)
            for ref in refs:
                self.assertEqual(ref.get('published'), False)
            CropService().update_media_references(item, original, True)
            refs = references_service.get(req=None, lookup={'item_id': 'testimage'})
            for ref in refs:
                self.assertEqual(ref.get('published'), True)

            remove_media_files(item)
            self.assertEqual(0, media_delete.call_count)

    def test_delete_by_ids(self):
        ids = self.app.data.insert(ARCHIVE, self.articles)
        archive_service = get_resource_service(ARCHIVE)
        archive_service.on_delete = MagicMock()
        archive_service.delete_by_article_ids(ids)
        self.assertTrue(self.app.data.mongo.is_empty(ARCHIVE))
        self.assertTrue(self.app.data.elastic.is_empty(ARCHIVE))
        self.assertEqual(len(self.articles), archive_service.on_delete.call_count)

    def test_remove_renditions_from_all_versions(self):
        renditions = copy.copy(self.media)

        ids = self.app.data.insert(ARCHIVE, [{
            'state': 'spiked',
            'expiry': get_expiry_date(-10),
            'type': 'picture',
            'renditions': {},
        }])

        self.app.data.insert('archive_versions', [{
            '_id_document': ids[0],
            'type': 'picture',
            'renditions': renditions,
        }])

        with patch.object(self.app.media, 'delete') as media_delete:
            get_resource_service('archive').delete_by_article_ids(ids)
            for key, rendition in renditions.items():
                media_delete.assert_any_call(rendition['media'])

    def _get_original(self, _id):
        return self.app.data.find_one(ARCHIVE, None, _id=_id)
Beispiel #42
0
    def fetch(self, docs, id=None, **kwargs):
        id_of_fetched_items = []

        for doc in docs:
            id_of_item_to_be_fetched = doc.get(
                config.ID_FIELD) if id is None else id

            desk_id = doc.get('desk')
            stage_id = doc.get('stage')

            ingest_service = get_resource_service('ingest')
            ingest_doc = ingest_service.find_one(req=None,
                                                 _id=id_of_item_to_be_fetched)

            if not ingest_doc:
                raise SuperdeskApiError.notFoundError(
                    _('Fail to found ingest item with _id: {id}').format(
                        id=id_of_item_to_be_fetched))

            if not is_workflow_state_transition_valid('fetch_from_ingest',
                                                      ingest_doc[ITEM_STATE]):
                raise InvalidStateTransitionError()

            if doc.get('macro'):  # there is a macro so transform it
                ingest_doc = get_resource_service('macros').execute_macro(
                    ingest_doc, doc.get('macro'))

            archived = utcnow()
            ingest_service.patch(id_of_item_to_be_fetched,
                                 {'archived': archived})

            dest_doc = dict(ingest_doc)

            if doc.get('target'):
                dest_doc.update(doc.get('target'))

            new_id = generate_guid(type=GUID_TAG)
            id_of_fetched_items.append(new_id)
            dest_doc[config.ID_FIELD] = new_id
            dest_doc[GUID_FIELD] = new_id
            generate_unique_id_and_name(dest_doc)

            dest_doc[config.VERSION] = 1
            dest_doc['versioncreated'] = archived
            send_to(doc=dest_doc, desk_id=desk_id, stage_id=stage_id)
            dest_doc[ITEM_STATE] = doc.get(ITEM_STATE, CONTENT_STATE.FETCHED)
            dest_doc[FAMILY_ID] = ingest_doc[config.ID_FIELD]
            dest_doc[INGEST_ID] = self.__strip_version_from_guid(
                ingest_doc[GUID_FIELD], ingest_doc.get('version'))
            dest_doc[INGEST_VERSION] = ingest_doc.get('version')
            dest_doc[ITEM_OPERATION] = ITEM_FETCH

            remove_unwanted(dest_doc)
            set_original_creator(dest_doc)
            self.__fetch_items_in_package(
                dest_doc, desk_id, stage_id,
                doc.get(ITEM_STATE, CONTENT_STATE.FETCHED))

            self.__fetch_associated_items(
                dest_doc, desk_id, stage_id,
                doc.get(ITEM_STATE, CONTENT_STATE.FETCHED))

            desk = get_resource_service('desks').find_one(req=None,
                                                          _id=desk_id)
            if desk and desk.get('default_content_profile'):
                dest_doc['profile'] = desk['default_content_profile']

            if dest_doc.get('type', 'text') in MEDIA_TYPES:
                dest_doc['profile'] = None

            get_resource_service(ARCHIVE).post([dest_doc])
            insert_into_versions(doc=dest_doc)
            build_custom_hateoas(custom_hateoas, dest_doc)
            superdesk.item_fetched.send(self,
                                        item=dest_doc,
                                        ingest_item=ingest_doc)
            doc.update(dest_doc)

        if kwargs.get('notify', True):
            ingest_doc.update({'task': dest_doc.get('task')})
            push_item_move_notification(ingest_doc, doc, 'item:fetch')

        return id_of_fetched_items
Beispiel #43
0
    def _can_remove_item(self, item, processed_item=None):
        """Recursively checks if the item can be removed.

        :param dict item: item to be remove
        :param set processed_item: processed items
        :return: True if item can be removed, False otherwise.
        """

        if processed_item is None:
            processed_item = dict()

        item_refs = []
        package_service = PackageService()
        archive_service = get_resource_service(ARCHIVE)

        if item.get(ITEM_TYPE) == CONTENT_TYPE.COMPOSITE:
            # Get the item references for is package
            item_refs = package_service.get_residrefs(item)

        if item.get(ITEM_TYPE) in [
                CONTENT_TYPE.TEXT, CONTENT_TYPE.PREFORMATTED
        ]:
            broadcast_items = get_resource_service(
                'archive_broadcast').get_broadcast_items_from_master_story(
                    item)
            # If master story expires then check if broadcast item is included in a package.
            # If included in a package then check the package expiry.
            item_refs.extend([
                broadcast_item.get(config.ID_FIELD)
                for broadcast_item in broadcast_items
            ])

            if item.get('rewrite_of'):
                item_refs.append(item.get('rewrite_of'))

            if item.get('rewritten_by'):
                item_refs.append(item.get('rewritten_by'))

        # get the list of associated item ids
        if item.get(ITEM_TYPE) in MEDIA_TYPES:
            item_refs.extend(self._get_associated_items(item))

        # get item reference where this referred
        item_refs.extend(package_service.get_linked_in_package_ids(item))

        # check item refs in the ids to remove set
        is_expired = item.get('expiry') and item.get('expiry') < utcnow()

        if is_expired:
            # now check recursively for all references
            if item.get(config.ID_FIELD) in processed_item:
                return is_expired

            processed_item[item.get(config.ID_FIELD)] = item
            if item_refs:
                archive_items = archive_service.get_from_mongo(
                    req=None, lookup={'_id': {
                        '$in': item_refs
                    }})
                for archive_item in archive_items:
                    is_expired = self._can_remove_item(archive_item,
                                                       processed_item)
                    if not is_expired:
                        break

        return is_expired
Beispiel #44
0
 def test_is_old_content(self):
     service = FileFeedingService()
     self.assertFalse(service.is_old_content(utcnow()))
     self.assertTrue(
         service.is_old_content(utcnow() - timedelta(minutes=11)))
Beispiel #45
0
 def test_validate_schedule_date_with_datetime_in_past_raises_superdeskApiError(
         self):
     self.assertRaises(SuperdeskApiError, validate_schedule,
                       utcnow() + timedelta(hours=-2))
Beispiel #46
0
    def _get_planning_date_filters(self, request):
        """Get date filters for planning resource

        :param request: object representing the HTTP request
        """
        params = request.args or MultiDict()
        date_filter_param, start_date, end_date = self._parse_date_params(params)
        if not (date_filter_param or start_date or end_date):
            return {
                'nested': {
                    'path': '_planning_schedule',
                    'filter': {
                        'range': {
                            '_planning_schedule.scheduled': {
                                'gte': 'now/d',
                                'time_zone': get_timezone_offset(config.DEFAULT_TIMEZONE, utcnow())
                            }
                        }
                    }
                }
            }

        start_of_week = self._get_start_of_week(params)
        date_filters = {
            'range': {
                '_planning_schedule.scheduled': {
                    'time_zone': get_timezone_offset(config.DEFAULT_TIMEZONE, utcnow())
                }
            }
        }

        if date_filter_param.lower() == 'today':
            date_filters['range']['_planning_schedule.scheduled']['gte'] = 'now/d'
            date_filters['range']['_planning_schedule.scheduled']['lt'] = 'now+24h/d'
        elif date_filter_param.lower() == 'tomorrow':
            date_filters['range']['_planning_schedule.scheduled']['gte'] = 'now+24h/d'
            date_filters['range']['_planning_schedule.scheduled']['lt'] = 'now+48h/d'
        elif date_filter_param.lower() == 'this_week':
            end_of_this_week = get_start_of_next_week(None, start_of_week)
            start_of_this_week = end_of_this_week - timedelta(days=7)

            date_filters['range']['_planning_schedule.scheduled']['gte'] = \
                '{}||/d'.format(start_of_this_week.strftime(config.ELASTIC_DATE_FORMAT))
            date_filters['range']['_planning_schedule.scheduled']['lt'] = \
                '{}||/d'.format(end_of_this_week.strftime(config.ELASTIC_DATE_FORMAT))
        elif date_filter_param.lower() == 'next_week':
            start_of_next_week = get_start_of_next_week(None, start_of_week)
            end_of_next_week = start_of_next_week + timedelta(days=7)

            date_filters['range']['_planning_schedule.scheduled']['gte'] = \
                '{}||/d'.format(start_of_next_week.strftime(config.ELASTIC_DATE_FORMAT))
            date_filters['range']['_planning_schedule.scheduled']['lt'] = \
                '{}||/d'.format(end_of_next_week.strftime(config.ELASTIC_DATE_FORMAT))
        else:
            if start_date:
                date_filters['range']['_planning_schedule.scheduled']['gte'] = start_date
            if end_date:
                date_filters['range']['_planning_schedule.scheduled']['lte'] = end_date

        return {
            'nested': {
                'path': '_planning_schedule',
                'filter': date_filters,
            }
        }
Beispiel #47
0
 def test_validate_schedule_at_utc_zero_hours(self):
     validate_schedule(
         (utcnow() + timedelta(days=1)).replace(hour=0,
                                                minute=0,
                                                second=0,
                                                microsecond=0))
 def test_text_formatter(self):
     embargo_ts = (utcnow() + timedelta(days=2))
     article = {
         '_id':
         'tag:aap.com.au:20150613:12345',
         'guid':
         'tag:aap.com.au:20150613:12345',
         '_current_version':
         1,
         'anpa_category': [{
             'qcode': 'a'
         }],
         'source':
         'AAP',
         'headline':
         'This is a test headline',
         'byline':
         'joe',
         'slugline':
         'slugline',
         'subject': [{
             'qcode': '02011001',
             'name': 'international court or tribunal',
             'parent': None
         }, {
             'qcode': '02011002',
             'name': 'extradition'
         }],
         'anpa_take_key':
         'take_key',
         'unique_id':
         '1',
         'body_html':
         'The story body',
         'type':
         'text',
         'word_count':
         '1',
         'priority':
         1,
         'profile':
         'snap',
         'state':
         'published',
         'urgency':
         2,
         'pubstatus':
         'usable',
         'creditline':
         'sample creditline',
         'keywords': ['traffic'],
         'abstract':
         '<p>sample <b>abstract</b></p>',
         'place': [{
             'name': 'Australia',
             'qcode': 'NSW'
         }],
         'embargo':
         embargo_ts,
         'body_footer':
         '<p>call helpline 999 if you are planning to quit smoking</p>',
         'company_codes': [{
             'name': 'YANCOAL AUSTRALIA LIMITED',
             'qcode': 'YAL',
             'security_exchange': 'ASX'
         }],
         'genre': [{
             'name': 'Article',
             'qcode': 'article'
         }],
         'flags': {
             'marked_for_legal': True
         },
         'extra': {
             'foo': 'test'
         },
     }
     seq, doc = self.formatter.format(article,
                                      {'name': 'Test Subscriber'})[0]
     expected = {
         "guid":
         "tag:aap.com.au:20150613:12345",
         "version":
         "1",
         "place": [{
             'name': 'Australia',
             'code': 'NSW'
         }],
         "pubstatus":
         "usable",
         "body_html":
         "The story body<p>call helpline 999 if you are planning to quit smoking</p>",
         "type":
         "text",
         "subject": [{
             "code": "02011001",
             "name": "international court or tribunal"
         }, {
             "code": "02011002",
             "name": "extradition"
         }],
         "service": [{
             "code": "a"
         }],
         "source":
         "AAP",
         "headline":
         "This is a test headline",
         "byline":
         "joe",
         "urgency":
         2,
         "priority":
         1,
         "embargoed":
         embargo_ts.isoformat(),
         "profile":
         "snap",
         "slugline":
         "slugline",
         "description_text":
         "sample abstract",
         "description_html":
         "<p>sample <b>abstract</b></p>",
         'keywords': ['traffic'],
         'organisation': [{
             'name': 'YANCOAL AUSTRALIA LIMITED',
             'rel': 'Securities Identifier',
             'symbols': [{
                 'ticker': 'YAL',
                 'exchange': 'ASX'
             }]
         }],
         'genre': [{
             'name': 'Article',
             'code': 'article'
         }],
         'signal': [{
             'name': 'Content Warning',
             'code': 'cwarn',
             'scheme': 'http://cv.iptc.org/newscodes/signal/'
         }],
         'extra': {
             'foo': 'test'
         },
     }
     self.assertEqual(json.loads(doc), expected)
Beispiel #49
0
    def setUp(self):
        self.req = ParsedRequest()
        with self.app.test_request_context(self.app.config.get('URL_PREFIX')):
            self.articles = [{
                '_id': '1',
                'urgency': 1,
                'headline': 'story',
                'state': 'fetched'
            }, {
                '_id': '2',
                'headline': 'prtorque',
                'state': 'fetched'
            }, {
                '_id': '3',
                'urgency': 3,
                'state': 'fetched',
                'flags': {
                    'marked_for_sms': True
                }
            }, {
                '_id': '4',
                'urgency': 4,
                'state': 'fetched',
                'task': {
                    'desk': '1'
                },
                'ingest_provider': '1'
            }, {
                '_id': '5',
                'urgency': 2,
                'state': 'fetched',
                'task': {
                    'desk': '2'
                },
                'priority': 3
            }, {
                '_id': '6',
                'state': 'fetched',
                'embargo': utcnow(),
                'schedule_settings': {
                    'utc_embargo': utcnow() + timedelta(minutes=20)
                }
            }, {
                '_id': '7',
                'genre': [{
                    'name': 'Sidebar'
                }],
                'state': 'fetched'
            }, {
                '_id':
                '8',
                'subject': [{
                    'name': 'adult education',
                    'qcode': '05001000',
                    'parent': '05000000'
                }, {
                    'name': 'high schools',
                    'qcode': '05005003',
                    'parent': '05005000'
                }],
                'state':
                'fetched'
            }, {
                '_id':
                '9',
                'state':
                'fetched',
                'anpa_category': [{
                    'qcode': 'a',
                    'name': 'Aus News'
                }]
            }, {
                '_id': '10',
                'body_html': '<p>Mention<p>',
                'embargo': utcnow(),
                'schedule_settings': {
                    'utc_embargo': utcnow() - timedelta(minutes=20)
                }
            }, {
                '_id': '11',
                'place': [{
                    'qcode': 'NSW',
                    'name': 'NSW'
                }],
                'state': 'fetched'
            }, {
                '_id':
                '12',
                'body_html':
                '<div>&#13;\n&#13;\n<body dir=\"ltr\">&#13;\n<div>&#13;\n'
                '<span>SDA</span><br/>&#13;\n</div>&#13;\n&#13;\n</body>'
                '&#13;\n</div>',
                'embargo':
                utcnow()
            }]

            self.app.data.insert('archive', self.articles)

            self.app.data.insert('filter_conditions', [{
                '_id': 1,
                'field': 'headline',
                'operator': 'like',
                'value': 'tor',
                'name': 'test-1'
            }])
            self.app.data.insert('filter_conditions', [{
                '_id': 2,
                'field': 'urgency',
                'operator': 'in',
                'value': '2',
                'name': 'test-2'
            }])
            self.app.data.insert('filter_conditions', [{
                '_id': 3,
                'field': 'urgency',
                'operator': 'in',
                'value': '3,4,5',
                'name': 'test-2'
            }])
            self.app.data.insert('filter_conditions', [{
                '_id': 4,
                'field': 'urgency',
                'operator': 'nin',
                'value': '1,2,3',
                'name': 'test-2'
            }])
            self.app.data.insert('filter_conditions', [{
                '_id': 5,
                'field': 'urgency',
                'operator': 'in',
                'value': '2,5',
                'name': 'test-2'
            }])
            self.app.data.insert(
                'content_filters',
                [{
                    "_id": 1,
                    "content_filter": [{
                        "expression": {
                            "fc": [1]
                        }
                    }],
                    "name": "soccer-only"
                }])
Beispiel #50
0
    def init_data(self):
        self.users = [{'_id': '1', 'username': '******'}]
        self.desks = [{
            '_id': ObjectId('123456789ABCDEF123456789'),
            'name': 'desk1'
        }]
        self.products = [{
            "_id": "1",
            "name": "prod1",
            "geo_restrictions": "NSW",
            "email": "*****@*****.**"
        }, {
            "_id": "2",
            "name": "prod2",
            "codes": "abc,def,"
        }, {
            "_id": "3",
            "name": "prod3",
            "codes": "xyz"
        }]
        self.subscribers = [{
            "_id":
            "1",
            "name":
            "sub1",
            "is_active":
            True,
            "subscriber_type":
            SUBSCRIBER_TYPES.WIRE,
            "media_type":
            "media",
            "sequence_num_settings": {
                "max": 10,
                "min": 1
            },
            "email":
            "*****@*****.**",
            "products": ["1"],
            "destinations": [{
                "name": "dest1",
                "format": "nitf",
                "delivery_type": "ftp",
                "config": {
                    "address": "127.0.0.1",
                    "username": "******"
                }
            }]
        }, {
            "_id":
            "2",
            "name":
            "sub2",
            "is_active":
            True,
            "subscriber_type":
            SUBSCRIBER_TYPES.WIRE,
            "media_type":
            "media",
            "sequence_num_settings": {
                "max": 10,
                "min": 1
            },
            "email":
            "*****@*****.**",
            "products": ["1"],
            "destinations": [{
                "name": "dest2",
                "format": "nitf",
                "delivery_type": "filecopy",
                "config": {
                    "address": "/share/copy"
                }
            }, {
                "name": "dest3",
                "format": "nitf",
                "delivery_type": "Email",
                "config": {
                    "recipients": "*****@*****.**"
                }
            }]
        }, {
            "_id":
            "3",
            "name":
            "sub3",
            "is_active":
            True,
            "subscriber_type":
            SUBSCRIBER_TYPES.DIGITAL,
            "media_type":
            "media",
            "sequence_num_settings": {
                "max": 10,
                "min": 1
            },
            "email":
            "*****@*****.**",
            "products": ["1"],
            "destinations": [{
                "name": "dest1",
                "format": "nitf",
                "delivery_type": "ftp",
                "config": {
                    "address": "127.0.0.1",
                    "username": "******"
                }
            }]
        }, {
            "_id":
            "4",
            "name":
            "sub4",
            "is_active":
            True,
            "subscriber_type":
            SUBSCRIBER_TYPES.WIRE,
            "media_type":
            "media",
            "sequence_num_settings": {
                "max": 10,
                "min": 1
            },
            "products": ["1"],
            "destinations": [{
                "name": "dest1",
                "format": "nitf",
                "delivery_type": "ftp",
                "config": {
                    "address": "127.0.0.1",
                    "username": "******"
                }
            }]
        }, {
            "_id":
            "5",
            "name":
            "sub5",
            "is_active":
            True,
            "subscriber_type":
            SUBSCRIBER_TYPES.ALL,
            "media_type":
            "media",
            "sequence_num_settings": {
                "max": 10,
                "min": 1
            },
            "email":
            "*****@*****.**",
            "codes":
            "xyz,  klm",
            "products": ["1", "2"],
            "destinations": [{
                "name": "dest1",
                "format": "ninjs",
                "delivery_type": "ftp",
                "config": {
                    "address": "127.0.0.1",
                    "username": "******"
                }
            }]
        }]

        self.articles = [{
            'guid':
            'tag:localhost:2015:69b961ab-2816-4b8a-a584-a7b402fed4f9',
            '_id':
            '1',
            ITEM_TYPE:
            CONTENT_TYPE.TEXT,
            'last_version':
            3,
            config.VERSION:
            4,
            'body_html':
            'Test body',
            'anpa_category': [{
                'qcode': 'A',
                'name': 'Sport'
            }],
            'urgency':
            4,
            'headline':
            'Two students missing',
            'pubstatus':
            'usable',
            'firstcreated':
            utcnow(),
            'byline':
            'By Alan Karben',
            'ednote':
            'Andrew Marwood contributed to this article',
            'dateline': {
                'located': {
                    'city': 'Sydney'
                }
            },
            'keywords': ['Student', 'Crime', 'Police', 'Missing'],
            'subject': [{
                'qcode': '17004000',
                'name': 'Statistics'
            }, {
                'qcode': '04001002',
                'name': 'Weather'
            }],
            'task': {
                'user': '******',
                'desk': '123456789ABCDEF123456789'
            },
            ITEM_STATE:
            CONTENT_STATE.PUBLISHED,
            'expiry':
            utcnow() + timedelta(minutes=20),
            'slugline':
            'story slugline',
            'unique_name':
            '#1'
        }, {
            'guid':
            'tag:localhost:2015:69b961ab-2816-4b8a-a974-xy4532fe33f9',
            '_id':
            '2',
            'last_version':
            3,
            config.VERSION:
            4,
            'body_html':
            'Test body of the second article',
            'slugline':
            'story slugline',
            'urgency':
            4,
            'anpa_category': [{
                'qcode': 'A',
                'name': 'Sport'
            }],
            'headline':
            'Another two students missing',
            'pubstatus':
            'usable',
            'firstcreated':
            utcnow(),
            'byline':
            'By Alan Karben',
            'ednote':
            'Andrew Marwood contributed to this article',
            'dateline': {
                'located': {
                    'city': 'Sydney'
                }
            },
            'keywords': ['Student', 'Crime', 'Police', 'Missing'],
            'subject': [{
                'qcode': '17004000',
                'name': 'Statistics'
            }, {
                'qcode': '04001002',
                'name': 'Weather'
            }],
            'expiry':
            utcnow() + timedelta(minutes=20),
            'task': {
                'user': '******',
                'desk': '123456789ABCDEF123456789'
            },
            ITEM_STATE:
            CONTENT_STATE.PROGRESS,
            'publish_schedule':
            "2016-05-30T10:00:00+0000",
            ITEM_TYPE:
            CONTENT_TYPE.TEXT,
            'unique_name':
            '#2'
        }, {
            'guid':
            'tag:localhost:2015:69b961ab-2816-4b8a-a584-a7b402fed4fa',
            '_id':
            '3',
            'last_version':
            3,
            config.VERSION:
            4,
            'body_html':
            'Test body',
            'slugline':
            'story slugline',
            'urgency':
            4,
            'anpa_category': [{
                'qcode': 'A',
                'name': 'Sport'
            }],
            'headline':
            'Two students missing killed',
            'pubstatus':
            'usable',
            'firstcreated':
            utcnow(),
            'byline':
            'By Alan Karben',
            'ednote':
            'Andrew Marwood contributed to this article killed',
            'dateline': {
                'located': {
                    'city': 'Sydney'
                }
            },
            'keywords': ['Student', 'Crime', 'Police', 'Missing'],
            'subject': [{
                'qcode': '17004000',
                'name': 'Statistics'
            }, {
                'qcode': '04001002',
                'name': 'Weather'
            }],
            'task': {
                'user': '******',
                'desk': '123456789ABCDEF123456789'
            },
            ITEM_STATE:
            CONTENT_STATE.KILLED,
            'expiry':
            utcnow() + timedelta(minutes=20),
            ITEM_TYPE:
            CONTENT_TYPE.TEXT,
            'unique_name':
            '#3'
        }, {
            'guid':
            '8',
            '_id':
            '8',
            'last_version':
            3,
            config.VERSION:
            4,
            'target_regions': [{
                'qcode': 'NSW',
                'name': 'New South Wales',
                'allow': True
            }],
            'body_html':
            'Take-1 body',
            'urgency':
            4,
            'headline':
            'Take-1 headline',
            'abstract':
            'Abstract for take-1',
            'anpa_category': [{
                'qcode': 'A',
                'name': 'Sport'
            }],
            'pubstatus':
            'done',
            'firstcreated':
            utcnow(),
            'byline':
            'By Alan Karben',
            'dateline': {
                'located': {
                    'city': 'Sydney'
                }
            },
            'slugline':
            'taking takes',
            'keywords': ['Student', 'Crime', 'Police', 'Missing'],
            'subject': [{
                'qcode': '17004000',
                'name': 'Statistics'
            }, {
                'qcode': '04001002',
                'name': 'Weather'
            }],
            'task': {
                'user': '******',
                'desk': '123456789ABCDEF123456789'
            },
            ITEM_STATE:
            CONTENT_STATE.PROGRESS,
            'expiry':
            utcnow() + timedelta(minutes=20),
            ITEM_TYPE:
            CONTENT_TYPE.TEXT,
            'unique_name':
            '#8'
        }, {
            '_id': '9',
            'urgency': 3,
            'headline': 'creator',
            'task': {
                'user': '******',
                'desk': '123456789ABCDEF123456789'
            },
            ITEM_STATE: CONTENT_STATE.FETCHED
        }, {
            'guid':
            'tag:localhost:2015:69b961ab-a7b402fed4fb',
            '_id':
            'test_item_9',
            'last_version':
            3,
            config.VERSION:
            4,
            'body_html':
            'Student Crime. Police Missing.',
            'urgency':
            4,
            'headline':
            'Police Missing',
            'abstract':
            'Police Missing',
            'anpa_category': [{
                'qcode': 'A',
                'name': 'Australian General News'
            }],
            'pubstatus':
            'usable',
            'firstcreated':
            utcnow(),
            'byline':
            'By Alan Karben',
            'dateline': {
                'located': {
                    'city': 'Sydney'
                }
            },
            'slugline':
            'Police Missing',
            'keywords': ['Student', 'Crime', 'Police', 'Missing'],
            'subject': [{
                'qcode': '17004000',
                'name': 'Statistics'
            }, {
                'qcode': '04001002',
                'name': 'Weather'
            }],
            'task': {
                'user': '******',
                'desk': '123456789ABCDEF123456789'
            },
            ITEM_STATE:
            CONTENT_STATE.PROGRESS,
            ITEM_TYPE:
            CONTENT_TYPE.TEXT,
            'unique_name':
            '#9'
        }, {
            'guid':
            'tag:localhost:10:10:10:2015:69b961ab-2816-4b8a-a584-a7b402fed4fc',
            '_id':
            '100',
            config.VERSION:
            3,
            'task': {
                'user': '******',
                'desk': '123456789ABCDEF123456789'
            },
            ITEM_TYPE:
            CONTENT_TYPE.COMPOSITE,
            'groups': [{
                'id': 'root',
                'refs': [{
                    'idRef': 'main'
                }],
                'role': 'grpRole:NEP'
            }, {
                'id':
                'main',
                'refs': [{
                    'location': ARCHIVE,
                    ITEM_TYPE: CONTENT_TYPE.COMPOSITE,
                    RESIDREF: '6'
                }],
                'role':
                'grpRole:main'
            }],
            'firstcreated':
            utcnow(),
            'expiry':
            utcnow() + timedelta(minutes=20),
            'unique_name':
            '#100',
            ITEM_STATE:
            CONTENT_STATE.PROGRESS
        }]
class AsiaNetFeedParserTestCase(TestCase):
    filename = 'asianet_{}.tst'
    year = utcnow().year

    headers = [{
        'headline':
        'Media Release: Digital Turbine, Inc.',
        'anpa_take_key':
        'Digital Turbine, Inc.',
        'original_source':
        'AsiaNet',
        'first_line':
        '<p>MEDIA RELEASE PR67276 Digital Turbine Partners with'
    }, {
        'headline': 'Media Release: Queen Elizabeth Prize',
        'anpa_take_key': 'Queen Elizabeth Prize',
        'original_source': 'AsiaNet',
        'first_line': '<p>MEDIA RELEASE PR67254 Queen Elizabeth Prize'
    }, {
        'headline':
        'Media Release: Escola Aguia de Ouro',
        'anpa_take_key':
        'Escola Aguia de Ouro',
        'original_source':
        'AsiaNet',
        'first_line':
        '<p>MEDIA RELEASE PR67255 Animal rights come to Brazil'
    }, {
        'headline':
        'Media Release: Essence',
        'anpa_take_key':
        'Essence',
        'original_source':
        'AsiaNet',
        'first_line':
        '<p>MEDIA RELEASE PR67257 Digital Agency Essence Builds on Enormous Growth'
    }, {
        'headline':
        'Media Release: OMRON Corporation',
        'anpa_take_key':
        'OMRON Corporation',
        'original_source':
        'AsiaNet',
        'first_line':
        '<p>MEDIA RELEASE PR67261 OMRON Launches Promotional Website for AI-equipped'
    }, {
        'headline':
        'Media Release: OnApp',
        'anpa_take_key':
        'OnApp',
        'original_source':
        'AsiaNet',
        'first_line':
        '<p>MEDIA RELEASE PR67266 OnApp v5.3 Simplifies Add-on Services'
    }, {
        'headline':
        'Media Release: Shinetech Software Inc.',
        'anpa_take_key':
        'Shinetech Software Inc.',
        'original_source':
        'AsiaNet',
        'first_line':
        '<p>MEDIA RELEASE PR67271 Shinetech Software, Inc. Reports 16% Growth in 2016'
    }, {
        'headline':
        'Media Release: Huntsman Family Investments',
        'anpa_take_key':
        'Huntsman Family Investme',
        'original_source':
        'AsiaNet',
        'first_line':
        '<p>MEDIA RELEASE PR67275 Huntsman Family Investments to Acquire GTA TeleGuam'
    }, {
        'headline':
        'Media Release: Neovia Oncology Ltd',
        'anpa_take_key':
        'Neovia Oncology Ltd',
        'original_source':
        'AsiaNet',
        'first_line':
        '<p>MEDIA RELEASE PR67278 Neovia Enrolls First Patient in Cancer Trial'
    }, {
        'headline':
        'Media Release: IndiGrid',
        'anpa_take_key':
        'IndiGrid',
        'original_source':
        'AsiaNet',
        'first_line':
        '<p>MEDIA RELEASE PR74541 IndiGrid Delivers Another Strong Quarter'
    }]

    def setUp(self):
        self.provider = {'name': 'Test'}
        self.maxDiff = None

    def test_can_parse(self):
        for i in range(1, 11):
            self.assertTrue(AsiaNetFeedParser().can_parse(
                self._get_fixture(i)))

    def test_feed_parser(self):
        test_keys = ['headline', 'anpa_take_key', 'original_source']
        for i in range(1, 11):
            item = AsiaNetFeedParser().parse(self._get_fixture(i),
                                             self.provider)
            expected = self.headers[i - 1]

            for key in test_keys:
                self.assertEqual(item[key], expected[key])

            self.assertGreater(item['word_count'], 0)

            # This tests for the body content, as well as as html escaping
            self.assertTrue(item['body_html'].startswith(
                expected['first_line']))

    def _get_fixture(self, index):
        dirname = os.path.dirname(os.path.realpath(__file__))
        return os.path.normpath(
            os.path.join(dirname, '../fixtures', self.filename.format(index)))
Beispiel #52
0
    def _get_events_date_filters(self, request):
        """Get date filters for events resource

        :param request: object representing the HTTP request
        """
        params = request.args or MultiDict()
        date_filter_param, start_date, end_date = self._parse_date_params(params)
        if not (date_filter_param or start_date or end_date):
            return {
                'range': {
                    'dates.end': {
                        'gte': 'now/d',
                        'time_zone': get_timezone_offset(config.DEFAULT_TIMEZONE, utcnow())
                    }
                }
            }

        start_of_week = self._get_start_of_week(params)
        date_filters = []

        def get_pre_defined_date_filter(start, end):
            filterList = list()
            filterList.append({
                'range': {
                    'dates.start': {
                        'gte': start,
                        'lt': end,
                        'time_zone': get_timezone_offset(config.DEFAULT_TIMEZONE, utcnow())

                    }
                }
            })
            filterList.append({
                'range': {
                    'dates.end': {
                        'gte': start,
                        'lt': end,
                        'time_zone': get_timezone_offset(config.DEFAULT_TIMEZONE, utcnow())
                    }
                }
            })

            filterList.append({
                'and': {
                    'filters': [
                        {
                            'range': {
                                'dates.start': {
                                    'lt': start,
                                    'time_zone': get_timezone_offset(config.DEFAULT_TIMEZONE, utcnow())
                                },
                            },
                        },
                        {
                            'range': {
                                'dates.end': {
                                    'gt': end,
                                    'time_zone': get_timezone_offset(config.DEFAULT_TIMEZONE, utcnow())
                                },
                            },
                        },
                    ],
                },
            })
            return filterList

        if date_filter_param.lower() == 'today':
            date_filters = get_pre_defined_date_filter('now/d', 'now+24h/d')
        elif date_filter_param.lower() == 'tomorrow':
            date_filters = get_pre_defined_date_filter('now+24h/d', 'now+48h/d')
        elif date_filter_param.lower() == 'this_week':
            end_of_this_week = get_start_of_next_week(None, start_of_week)
            start_of_this_week = end_of_this_week - timedelta(days=7)

            date_filters = get_pre_defined_date_filter(
                '{}||/d'.format(start_of_this_week.strftime(config.ELASTIC_DATE_FORMAT)),
                '{}||/d'.format(end_of_this_week.strftime(config.ELASTIC_DATE_FORMAT))
            )
        elif date_filter_param.lower() == 'next_week':
            start_of_next_week = get_start_of_next_week(None, start_of_week)
            end_of_next_week = start_of_next_week + timedelta(days=7)

            date_filters = get_pre_defined_date_filter(
                '{}||/d'.format(start_of_next_week.strftime(config.ELASTIC_DATE_FORMAT)),
                '{}||/d'.format(end_of_next_week.strftime(config.ELASTIC_DATE_FORMAT))
            )
        else:
            if start_date and not end_date:
                date_filters.extend([
                    {
                        'range': {
                            'dates.start': {
                                'gte': start_date,
                                'time_zone': get_timezone_offset(config.DEFAULT_TIMEZONE, utcnow())
                            },
                        },
                    },
                    {
                        'range': {
                            'dates.end': {
                                'gte': start_date,
                                'time_zone': get_timezone_offset(config.DEFAULT_TIMEZONE, utcnow())
                            },
                        },
                    }
                ])
            elif not start_date and end_date:
                date_filters.extend([
                    {
                        'range': {
                            'dates.end': {
                                'lte': end_date,
                                'time_zone': get_timezone_offset(config.DEFAULT_TIMEZONE, utcnow())
                            },
                        },
                    },
                    {
                        'range': {
                            'dates.start': {
                                'lte': end_date,
                                'time_zone': get_timezone_offset(config.DEFAULT_TIMEZONE, utcnow())
                            },
                        },
                    }
                ])
            else:
                date_filters.extend([
                    {
                        'range': {
                            'dates.start': {
                                'gte': start_date,
                                'time_zone': get_timezone_offset(config.DEFAULT_TIMEZONE, utcnow())
                            },
                            'dates.end': {
                                'lte': end_date,
                                'time_zone': get_timezone_offset(config.DEFAULT_TIMEZONE, utcnow())
                            },
                        },
                    },
                    {
                        'and': {
                            'filters': [
                                {
                                    'range': {
                                        'dates.start': {
                                            'lt': start_date,
                                            'time_zone': get_timezone_offset(config.DEFAULT_TIMEZONE, utcnow())
                                        },
                                    },
                                },
                                {
                                    'range': {
                                        'dates.end': {
                                            'gt': end_date,
                                            'time_zone': get_timezone_offset(config.DEFAULT_TIMEZONE, utcnow())
                                        },
                                    },
                                },
                            ],
                        },
                    },
                    {
                        'or': {
                            'filters': [
                                {
                                    'range': {
                                        'dates.start': {
                                            'gte': start_date,
                                            'lte': end_date,
                                            'time_zone': get_timezone_offset(config.DEFAULT_TIMEZONE, utcnow())
                                        },
                                    },
                                },
                                {
                                    'range': {
                                        'dates.end': {
                                            'gte': start_date,
                                            'lte': end_date,
                                            'time_zone': get_timezone_offset(config.DEFAULT_TIMEZONE, utcnow())
                                        },
                                    },
                                },
                            ],
                        },
                    }
                ])

        return {
            'or': {
                'filters': date_filters
            }
        }
Beispiel #53
0
 def test_products(self):
     self.app.data.insert(
         'content_filters',
         [{
             "_id": 3,
             "content_filter": [{
                 "expression": {
                     "pf": [1],
                     "fc": [2]
                 }
             }],
             "name": "soccer-only3"
         }])
     self.app.data.insert('filter_conditions', [{
         '_id': 1,
         'field': 'headline',
         'operator': 'like',
         'value': 'test',
         'name': 'test-1'
     }])
     self.app.data.insert('filter_conditions', [{
         '_id': 2,
         'field': 'urgency',
         'operator': 'in',
         'value': '2',
         'name': 'test-2'
     }])
     self.app.data.insert('products', [{
         "_id": 1,
         "content_filter": {
             "filter_id": 3,
             "filter_type": "permitting"
         },
         "name": "p-1",
         "product_type": "api"
     }])
     self.app.data.insert('vocabularies', [{
         "_id":
         "locators",
         "display_name":
         "Locators",
         "type":
         "unmanageable",
         "unique_field":
         "qcode",
         "items": [
             {
                 "is_active": True,
                 "name": "NSW",
                 "qcode": "NSW",
                 "state": "New South Wales",
                 "country": "Australia",
                 "world_region": "Oceania",
                 "group": "Australia"
             },
         ],
     }])
     embargo_ts = (utcnow() + timedelta(days=2))
     article = {
         '_id':
         'tag:aap.com.au:20150613:12345',
         'guid':
         'tag:aap.com.au:20150613:12345',
         '_current_version':
         1,
         'anpa_category': [{
             'qcode': 'a'
         }],
         'source':
         'AAP',
         'headline':
         'This is a test headline',
         'byline':
         'joe',
         'slugline':
         'slugline',
         'subject': [{
             'qcode': '02011001',
             'name': 'international court or tribunal',
             'parent': None
         }, {
             'qcode': '02011002',
             'name': 'extradition'
         }],
         'anpa_take_key':
         'take_key',
         'unique_id':
         '1',
         'body_html':
         'The story body',
         'type':
         'text',
         'word_count':
         '1',
         'priority':
         1,
         'profile':
         'snap',
         'state':
         'published',
         'urgency':
         2,
         'pubstatus':
         'usable',
         'creditline':
         'sample creditline',
         'keywords': ['traffic'],
         'abstract':
         '<p>sample <b>abstract</b></p>',
         'place': [{
             'name': 'NSW',
             'qcode': 'NSW'
         }],
         'embargo':
         embargo_ts,
         'body_footer':
         '<p>call helpline 999 if you are planning to quit smoking</p>',
         'company_codes': [{
             'name': 'YANCOAL AUSTRALIA LIMITED',
             'qcode': 'YAL',
             'security_exchange': 'ASX'
         }],
         'genre': [{
             'name': 'Article',
             'qcode': 'article'
         }],
         'flags': {
             'marked_for_legal': True
         },
         'extra': {
             'foo': 'test'
         },
         'operation':
         'publish'
     }
     seq, doc = self.formatter.format(article,
                                      {'name': 'Test Subscriber'})[0]
     expected = {
         "guid":
         "tag:aap.com.au:20150613:12345",
         "version":
         "1",
         "place": [{
             "code": "NSW",
             "name": "New South Wales"
         }],
         "pubstatus":
         "usable",
         "body_html":
         "The story body<p>call helpline 999 if you are planning to quit smoking</p>",
         "type":
         "text",
         "subject": [{
             "code": "02011001",
             "name": "international court or tribunal"
         }, {
             "code": "02011002",
             "name": "extradition"
         }],
         "service": [{
             "code": "a"
         }],
         "source":
         "AAP",
         "headline":
         "This is a test headline",
         "byline":
         "joe",
         "urgency":
         2,
         "priority":
         1,
         "embargoed":
         embargo_ts.isoformat(),
         "profile":
         "snap",
         "slugline":
         "slugline",
         "description_text":
         "sample abstract",
         "description_html":
         "<p>sample <b>abstract</b></p>",
         'keywords': ['traffic'],
         'organisation': [{
             'name': 'YANCOAL AUSTRALIA LIMITED',
             'rel': 'Securities Identifier',
             'symbols': [{
                 'ticker': 'YAL',
                 'exchange': 'ASX'
             }]
         }],
         'genre': [{
             'name': 'Article',
             'code': 'article'
         }],
         'signal': [{
             'name': 'Content Warning',
             'code': 'cwarn',
             'scheme': 'http://cv.iptc.org/newscodes/signal/'
         }],
         'extra': {
             'foo': 'test'
         },
         'charcount':
         67,
         'wordcount':
         13,
         'readtime':
         0,
         'products': [{
             'code': 1,
             'name': 'p-1'
         }]
     }
     self.assertEqual(json.loads(doc), expected)
     article['urgency'] = 1
     seq, doc = self.formatter.format(article,
                                      {'name': 'Test Subscriber'})[0]
     expected = {
         "guid":
         "tag:aap.com.au:20150613:12345",
         "version":
         "1",
         "place": [{
             "code": "NSW",
             "name": "New South Wales"
         }],
         "pubstatus":
         "usable",
         "body_html":
         "The story body<p>call helpline 999 if you are planning to quit smoking</p>",
         "type":
         "text",
         "subject": [{
             "code": "02011001",
             "name": "international court or tribunal"
         }, {
             "code": "02011002",
             "name": "extradition"
         }],
         "service": [{
             "code": "a"
         }],
         "source":
         "AAP",
         "headline":
         "This is a test headline",
         "byline":
         "joe",
         "urgency":
         1,
         "priority":
         1,
         "embargoed":
         embargo_ts.isoformat(),
         "profile":
         "snap",
         "slugline":
         "slugline",
         "description_text":
         "sample abstract",
         "description_html":
         "<p>sample <b>abstract</b></p>",
         'keywords': ['traffic'],
         'organisation': [{
             'name': 'YANCOAL AUSTRALIA LIMITED',
             'rel': 'Securities Identifier',
             'symbols': [{
                 'ticker': 'YAL',
                 'exchange': 'ASX'
             }]
         }],
         'genre': [{
             'name': 'Article',
             'code': 'article'
         }],
         'signal': [{
             'name': 'Content Warning',
             'code': 'cwarn',
             'scheme': 'http://cv.iptc.org/newscodes/signal/'
         }],
         'extra': {
             'foo': 'test'
         },
         'charcount':
         67,
         'wordcount':
         13,
         'readtime':
         0,
         'products': []
     }
     self.assertEqual(json.loads(doc), expected)
Beispiel #54
0
 def test_filter_expired_items(self):
     provider, provider_service = self.setup_reuters_provider()
     items = provider_service.fetch_ingest(reuters_guid)
     for item in items[:4]:
         item['expiry'] = utcnow() + timedelta(minutes=11)
     self.assertEqual(4, len(ingest.filter_expired_items(provider, items)))
 def _get_date(self, article, field):
     return utc_to_local(config.DEFAULT_TIMEZONE or 'UTC',
                         article.get(field) or utcnow())
Beispiel #56
0
def prepopulate_data(file_name, default_user=get_default_user(), directory=None):
    if not directory:
        directory = os.path.abspath(os.path.dirname(__file__))
    placeholders = {'NOW()': date_to_str(utcnow())}
    users = {default_user['username']: default_user['password']}
    default_username = default_user['username']
    file = os.path.join(directory, file_name)
    with open(file, 'rt', encoding='utf8') as app_prepopulation:
        json_data = json.load(app_prepopulation)
        for item in json_data:
            resource = item.get('resource', None)
            try:
                service = get_resource_service(resource)
            except KeyError:
                continue  # resource which is not configured - ignore
            username = item.get('username', None) or default_username
            set_logged_user(username, users[username])
            id_name = item.get('id_name', None)
            id_update = item.get('id_update', None)
            text = json.dumps(item.get('data', None))
            text = apply_placeholders(placeholders, text)
            data = json.loads(text)
            if resource:
                app.data.mongo._mongotize(data, resource)
            if resource == 'users':
                users.update({data['username']: data['password']})
            if id_update:
                id_update = apply_placeholders(placeholders, id_update)
                res = service.patch(ObjectId(id_update), data)
                if not res:
                    raise Exception()
            else:
                try:
                    ids = service.post([data])
                except werkzeug.exceptions.Conflict:
                    ids = [data['_id']]  # data with given id is there already
                except superdesk.errors.SuperdeskApiError:
                    continue  # an error raised by validation - can't guess why, so ignore
                if not ids:
                    raise Exception()
                if id_name:
                    placeholders[id_name] = str(ids[0])

            if app.config['VERSION'] in data:
                number_of_versions_to_insert = data[app.config['VERSION']]
                doc_versions = []

                if data[ITEM_STATE] not in [CONTENT_STATE.PUBLISHED, CONTENT_STATE.CORRECTED, CONTENT_STATE.KILLED]:
                    while number_of_versions_to_insert != 0:
                        doc_versions.append(data.copy())
                        number_of_versions_to_insert -= 1
                else:
                    if data[ITEM_STATE] in [CONTENT_STATE.KILLED, CONTENT_STATE.RECALLED, CONTENT_STATE.CORRECTED]:
                        latest_version = data.copy()
                        doc_versions.append(latest_version)

                        published_version = data.copy()
                        published_version[ITEM_STATE] = CONTENT_STATE.PUBLISHED
                        published_version[ITEM_OPERATION] = 'publish'
                        published_version[app.config['VERSION']] = number_of_versions_to_insert - 1
                        doc_versions.append(published_version)

                        number_of_versions_to_insert -= 2
                    elif data[ITEM_STATE] == CONTENT_STATE.PUBLISHED:
                        published_version = data.copy()
                        doc_versions.append(published_version)
                        number_of_versions_to_insert -= 1

                    while number_of_versions_to_insert != 0:
                        doc = data.copy()
                        doc[ITEM_STATE] = CONTENT_STATE.PROGRESS
                        doc.pop(ITEM_OPERATION, '')
                        doc[app.config['VERSION']] = number_of_versions_to_insert
                        doc_versions.append(doc)

                        number_of_versions_to_insert -= 1

                insert_versioning_documents(resource, doc_versions if doc_versions else data)
Beispiel #57
0
 def _init_article_versions(self):
     resource_def = self.app.config['DOMAIN']['archive_versions']
     version_id = versioned_id_field(resource_def)
     return [{
         'guid':
         'tag:localhost:2015:69b961ab-2816-4b8a-a584-a7b402fed4f9',
         version_id:
         '1',
         ITEM_TYPE:
         CONTENT_TYPE.TEXT,
         config.VERSION:
         1,
         'urgency':
         4,
         'pubstatus':
         'usable',
         'firstcreated':
         utcnow(),
         'byline':
         'By Alan Karben',
         'dateline': {
             'located': {
                 'city': 'Sydney'
             }
         },
         'keywords': ['Student', 'Crime', 'Police', 'Missing'],
         'subject': [{
             'qcode': '17004000',
             'name': 'Statistics'
         }, {
             'qcode': '04001002',
             'name': 'Weather'
         }],
         ITEM_STATE:
         CONTENT_STATE.DRAFT,
         'expiry':
         utcnow() + timedelta(minutes=20),
         'unique_name':
         '#8'
     }, {
         'guid':
         'tag:localhost:2015:69b961ab-2816-4b8a-a584-a7b402fed4f9',
         version_id:
         '1',
         ITEM_TYPE:
         CONTENT_TYPE.TEXT,
         config.VERSION:
         2,
         'urgency':
         4,
         'headline':
         'Two students missing',
         'pubstatus':
         'usable',
         'firstcreated':
         utcnow(),
         'byline':
         'By Alan Karben',
         'dateline': {
             'located': {
                 'city': 'Sydney'
             }
         },
         'keywords': ['Student', 'Crime', 'Police', 'Missing'],
         'subject': [{
             'qcode': '17004000',
             'name': 'Statistics'
         }, {
             'qcode': '04001002',
             'name': 'Weather'
         }],
         ITEM_STATE:
         CONTENT_STATE.SUBMITTED,
         'expiry':
         utcnow() + timedelta(minutes=20),
         'unique_name':
         '#8'
     }, {
         'guid':
         'tag:localhost:2015:69b961ab-2816-4b8a-a584-a7b402fed4f9',
         version_id:
         '1',
         ITEM_TYPE:
         CONTENT_TYPE.TEXT,
         config.VERSION:
         3,
         'urgency':
         4,
         'headline':
         'Two students missing',
         'pubstatus':
         'usable',
         'firstcreated':
         utcnow(),
         'byline':
         'By Alan Karben',
         'ednote':
         'Andrew Marwood contributed to this article',
         'dateline': {
             'located': {
                 'city': 'Sydney'
             }
         },
         'keywords': ['Student', 'Crime', 'Police', 'Missing'],
         'subject': [{
             'qcode': '17004000',
             'name': 'Statistics'
         }, {
             'qcode': '04001002',
             'name': 'Weather'
         }],
         ITEM_STATE:
         CONTENT_STATE.PROGRESS,
         'expiry':
         utcnow() + timedelta(minutes=20),
         'unique_name':
         '#8'
     }, {
         'guid':
         'tag:localhost:2015:69b961ab-2816-4b8a-a584-a7b402fed4f9',
         version_id:
         '1',
         ITEM_TYPE:
         CONTENT_TYPE.TEXT,
         config.VERSION:
         4,
         'body_html':
         'Test body',
         'urgency':
         4,
         'headline':
         'Two students missing',
         'pubstatus':
         'usable',
         'firstcreated':
         utcnow(),
         'byline':
         'By Alan Karben',
         'ednote':
         'Andrew Marwood contributed to this article',
         'dateline': {
             'located': {
                 'city': 'Sydney'
             }
         },
         'keywords': ['Student', 'Crime', 'Police', 'Missing'],
         'subject': [{
             'qcode': '17004000',
             'name': 'Statistics'
         }, {
             'qcode': '04001002',
             'name': 'Weather'
         }],
         ITEM_STATE:
         CONTENT_STATE.PROGRESS,
         'expiry':
         utcnow() + timedelta(minutes=20),
         'unique_name':
         '#8'
     }]
Beispiel #58
0
 def format_filename(self, item):
     attachment_filename = '%s-monitoring-export.pdf' % utcnow().strftime(
         '%Y%m%d%H%M%S')
     return secure_filename(attachment_filename)
 def test_validate_schedule(self):
     validate_schedule(utcnow() + timedelta(hours=2))
Beispiel #60
0
class LegalArchiveConsistencyCheckCommand(superdesk.Command):

    option_list = {
        superdesk.Option('--input_date',
                         '-i',
                         dest='input_date',
                         default=utcnow()),
        superdesk.Option('--days_to_process',
                         '-d',
                         dest='days_to_process',
                         default=1),
        superdesk.Option('--page_size', '-p', dest='page_size', default=500),
    }

    default_page_size = 500
    archive_ids = []

    def run(self, input_date, days_to_process, page_size):

        lock_name = 'legal_archive:consistency'
        self.default_page_size = int(page_size)
        days_to_process = int(days_to_process)
        if not lock(lock_name, expire=610):
            logger.warn("Task: {} is already running.".format(lock_name))
            return

        try:
            logger.info('Input Date: {}  ---- Days to Process: {}'.format(
                input_date, days_to_process))
            self.check_legal_archive_consistency(input_date, days_to_process)
            self.check_legal_archive_version_consistency()
            self.check_legal_archive_queue_consistency()
            logger.info('Completed the legal archive consistency check.')
        except:
            logger.exception(
                "Failed to execute LegalArchiveConsistencyCheckCommand")
        finally:
            unlock(lock_name)

    def check_legal_archive_consistency(self, input_date, days_to_process):
        start_time = utcnow()
        start_date, end_date = self._get_date_range(input_date,
                                                    days_to_process)
        logger.info('start_date: {}  ---- end_date: {}'.format(
            start_date, end_date))
        archive_items = self._get_archive_items(start_date, end_date)
        if archive_items:
            self.archive_ids = list(archive_items.keys())

        logger.info("Found {} items in archive.".format(len(archive_items)))
        legal_archive_items = self._get_legal_archive_items(self.archive_ids)
        logger.info("Found {} items in legal archive.".format(
            len(legal_archive_items)))
        record = self.check_consistency('archive', archive_items,
                                        legal_archive_items)
        record['completed_at'] = utcnow()
        record['started_at'] = start_time
        get_resource_service('legal_archive_consistency').post([record])

    def check_legal_archive_version_consistency(self):
        start_time = utcnow()
        if not self.archive_ids:
            return

        archive_items = self._get_archive_version_items(self.archive_ids)
        logger.info("Found {} items in archive versions.".format(
            len(archive_items)))
        legal_archive_items = self._get_legal_archive_version_items(
            self.archive_ids)
        logger.info("Found {} items in legal archive versions.".format(
            len(legal_archive_items)))
        record = self.check_consistency('archive_versions', archive_items,
                                        legal_archive_items)
        record['completed_at'] = utcnow()
        record['started_at'] = start_time
        get_resource_service('legal_archive_consistency').post([record])

    def check_legal_archive_queue_consistency(self):
        start_time = utcnow()
        if not self.archive_ids:
            return

        archive_items = self._get_publish_queue_items(self.archive_ids)
        logger.info("Found {} items in publish queue.".format(
            len(archive_items)))
        legal_archive_items = self._get_legal_publish_queue_items(
            self.archive_ids)
        logger.info("Found {} items in legal publish queue.".format(
            len(archive_items)))
        record = self.check_consistency('publish_queue', archive_items,
                                        legal_archive_items)
        record['completed_at'] = utcnow()
        record['started_at'] = start_time
        get_resource_service('legal_archive_consistency').post([record])

    def check_consistency(self, resource, archive_items, legal_items):
        record = {
            'resource': resource,
            'archive': len(archive_items),
            'legal': len(legal_items)
        }
        archive_ids = set(archive_items.keys())
        legal_ids = set(legal_items.keys())
        record['archive_only'] = list(archive_ids - legal_ids)
        diff = {}
        for k, v in archive_items.items():
            if not compare_dictionaries(v, legal_items.get(k)):
                diff[k.replace('.', ':')] = {
                    'archive': v,
                    'legal': legal_items.get(k)
                }

        record['difference'] = diff
        record['identical'] = len(archive_items) - len(diff)
        return record

    def __get_key(self, item):
        return item.get(config.ID_FIELD)

    def __get_version_key(self, item):
        return '{}-{}'.format(item.get('_id_document'),
                              item.get(config.VERSION))

    def _get_items(self, resource, query, sort, keys, callback):
        req = ParsedRequest()
        cursor = get_resource_service(resource).get_from_mongo(req=req,
                                                               lookup=query)
        count = cursor.count()
        no_of_buckets = len(range(0, count, self.default_page_size))
        items = {}
        req.sort = sort

        for bucket in range(0, no_of_buckets):
            skip = bucket * self.default_page_size
            logger.info('Page : {}, skip: {}'.format(bucket + 1, skip))
            cursor = get_resource_service(resource).get_from_mongo(
                req=req, lookup=query)
            cursor.skip(skip)
            cursor.limit(self.default_page_size)
            cursor = list(cursor)
            items.update({
                callback(item):
                {key: item.get(key)
                 for key in keys if key in item}
                for item in cursor
            })
        return items

    def _get_archive_items(self, start_date, end_date):
        """
        Gets the archive items from the mongo database that were updated today
        :return:
        """
        query = {
            '$and': [{
                '_updated': {
                    '$gte': date_to_str(start_date),
                    '$lte': date_to_str(end_date)
                }
            }, {
                ITEM_STATE: {
                    '$in': [
                        CONTENT_STATE.CORRECTED, CONTENT_STATE.PUBLISHED,
                        CONTENT_STATE.KILLED, CONTENT_STATE.RECALLED
                    ]
                }
            }]
        }

        return self._get_items(ARCHIVE, query, '_created',
                               [config.VERSION, 'versioncreated', 'state'],
                               self.__get_key)

    def _get_legal_archive_items(self, archive_ids):
        """
        Get the legal archive items
        :param list archive_ids:
        :return dict:
        """
        if not archive_ids:
            return {}

        query = {'$and': [{'_id': {'$in': archive_ids}}]}

        return self._get_items('legal_archive', query, '_created',
                               [config.VERSION, 'versioncreated', 'state'],
                               self.__get_key)

    def _get_archive_version_items(self, archive_ids):
        """
        Get the archive version items
        :param list archive_ids:
        :return dict:
        """
        if not archive_ids:
            return {}

        query = {'$and': [{'_id_document': {'$in': archive_ids}}]}

        return self._get_items('archive_versions', query, '_created',
                               [config.VERSION, 'versioncreated', 'state'],
                               self.__get_version_key)

    def _get_legal_archive_version_items(self, archive_ids):
        """
        Get the legal archive version items
        :param list archive_ids:
        :return dict:
        """
        if not archive_ids:
            return {}

        query = {'$and': [{'_id_document': {'$in': archive_ids}}]}

        return self._get_items('legal_archive_versions', query, '_created',
                               [config.VERSION, 'versioncreated', 'state'],
                               self.__get_version_key)

    def _get_publish_queue_items(self, archive_ids):
        """
        Get the publish queue items
        :param list archive_ids:
        :return dict:
        """
        if not archive_ids:
            return {}

        query = {'$and': [{'item_id': {'$in': archive_ids}}]}

        return self._get_items('publish_queue', query, '_created', [
            'published_seq_num', 'publishing_action', 'unique_name',
            'item_version', 'state', 'content_type'
        ], self.__get_key)

    def _get_legal_publish_queue_items(self, archive_ids):
        """
        Get the legal publish queue items
        :param list archive_ids:
        :return dict:
        """
        if not archive_ids:
            return {}

        query = {'$and': [{'item_id': {'$in': archive_ids}}]}

        return self._get_items('legal_publish_queue', query, '_created', [
            'published_seq_num', 'publishing_action', 'unique_name',
            'item_version', 'state', 'content_type'
        ], self.__get_key)

    def _get_date_range(self, input_date, days_to_process=1):
        """
        Calculate the date range to process
        :param datetime input_date:
        :param int days_to_process:
        :return:
        """
        if not input_date:
            input_date = utcnow()
        elif isinstance(input_date, str):
            input_date = get_date(input_date)
        elif not isinstance(input_date, datetime):
            raise ValueError("Invalid Input Date.")

        end_date = input_date
        start_date = (end_date - timedelta(days=int(days_to_process))).replace(
            hour=0, minute=0, second=0, microsecond=0)

        return start_date, end_date