def test_expiring_with_content(self):
        provider_name = 'reuters'
        guid = 'tag_reuters.com_2014_newsml_KBN0FL0NM:10'
        provider = get_resource_service('ingest_providers').find_one(name=provider_name, req=None)
        provider_service = self._get_provider_service(provider)
        provider_service.provider = provider
        provider_service.URL = provider.get('config', {}).get('url')
        items = provider_service.fetch_ingest(guid)
        now = utcnow()
        for i, item in enumerate(items):
            item['ingest_provider'] = provider['_id']
            expiry_time = now - timedelta(hours=11)
            if i > 4:
                expiry_time = now + timedelta(minutes=11)

            item['expiry'] = item['versioncreated'] = expiry_time

        service = get_resource_service('ingest')
        service.post(items)

        # ingest the items and expire them
        before = service.get(req=None, lookup={})
        self.assertEqual(6, before.count())

        remove = RemoveExpiredContent()
        remove.run(provider.get('type'))

        # only one left in ingest
        after = service.get(req=None, lookup={})
        self.assertEqual(1, after.count())
    def test_expiring_content_with_files(self):
        provider_name = 'reuters'
        guid = 'tag_reuters.com_2014_newsml_KBN0FL0NM:10'
        provider = get_resource_service('ingest_providers').find_one(name=provider_name, req=None)
        provider_service = self._get_provider_service(provider)
        provider_service.provider = provider
        provider_service.URL = provider.get('config', {}).get('url')
        items = provider_service.fetch_ingest(guid)
        for item in items:
            item['ingest_provider'] = provider['_id']

        now = utcnow()
        items[0]['expiry'] = now - timedelta(hours=11)
        items[1]['expiry'] = now - timedelta(hours=11)
        items[2]['expiry'] = now + timedelta(hours=11)
        items[5]['versioncreated'] = now + timedelta(minutes=11)

        # ingest the items and expire them
        self.ingest_items(items, provider, provider_service)

        # four files in grid fs
        current_files = self.app.media.fs('upload').find()
        self.assertEqual(4, current_files.count())

        remove = RemoveExpiredContent()
        remove.run(provider.get('type'))

        # all gone
        current_files = self.app.media.fs('upload').find()
        self.assertEqual(0, current_files.count())
    def test_expiring_with_content(self):
        provider, provider_service = self.setup_reuters_provider()
        items = provider_service.fetch_ingest(reuters_guid)
        now = utcnow()
        for i, item in enumerate(items):
            item['ingest_provider'] = provider['_id']
            expiry_time = now - timedelta(hours=11)
            if i > 4:
                expiry_time = now + timedelta(minutes=11)

            item['expiry'] = item['versioncreated'] = expiry_time

        service = get_resource_service('ingest')
        service.post(items)

        # ingest the items and expire them
        before = service.get(req=None, lookup={})
        self.assertEqual(6, before.count())

        remove = RemoveExpiredContent()
        remove.run(provider.get('type'))

        # only one left in ingest
        after = service.get(req=None, lookup={})
        self.assertEqual(1, after.count())

        req = ParsedRequest()
        self.assertEqual(1, self.app.data.elastic.find('ingest', req, {}).count())
        self.assertEqual(1, self.app.data.mongo.find('ingest', req, {}).count())
    def test_expiring_content_with_files(self):
        provider, provider_service = self.setup_reuters_provider()
        items = provider_service.fetch_ingest(reuters_guid)
        for item in items:
            item['ingest_provider'] = provider['_id']

        now = utcnow()
        items[0]['expiry'] = now - timedelta(hours=11)
        items[1]['expiry'] = now - timedelta(hours=11)
        items[2]['expiry'] = now + timedelta(hours=11)
        items[5]['versioncreated'] = now + timedelta(minutes=11)

        # ingest the items and expire them
        self.ingest_items(items, provider, provider_service)

        # four files in grid fs
        current_files = self.app.media.fs('upload').find()
        self.assertEqual(4, current_files.count())

        remove = RemoveExpiredContent()
        remove.run(provider.get('type'))

        # all gone
        current_files = self.app.media.fs('upload').find()
        self.assertEqual(0, current_files.count())
    def test_expiring_with_content(self):
        provider_name = 'reuters'
        guid = 'tag_reuters.com_2014_newsml_KBN0FL0NM:10'
        provider = get_resource_service('ingest_providers').find_one(
            name=provider_name, req=None)
        provider_service = self._get_provider_service(provider)
        provider_service.provider = provider
        provider_service.URL = provider.get('config', {}).get('url')
        items = provider_service.fetch_ingest(guid)
        now = utcnow()
        for i, item in enumerate(items):
            item['ingest_provider'] = provider['_id']
            expiry_time = now - timedelta(hours=11)
            if i > 4:
                expiry_time = now + timedelta(minutes=11)

            item['expiry'] = item['versioncreated'] = expiry_time

        service = get_resource_service('ingest')
        service.post(items)

        # ingest the items and expire them
        before = service.get(req=None, lookup={})
        self.assertEqual(6, before.count())

        remove = RemoveExpiredContent()
        remove.run(provider.get('type'))

        # only one left in ingest
        after = service.get(req=None, lookup={})
        self.assertEqual(1, after.count())
    def test_expiring_content_with_files(self):
        provider, provider_service = self.setup_reuters_provider()
        items = provider_service.fetch_ingest(reuters_guid)
        for item in items:
            item['ingest_provider'] = provider['_id']

        now = utcnow()
        items[0]['expiry'] = now - timedelta(hours=11)
        items[1]['expiry'] = now - timedelta(hours=11)
        items[2]['expiry'] = now + timedelta(hours=11)
        items[5]['versioncreated'] = now + timedelta(minutes=11)

        # ingest the items and expire them
        self.ingest_items(items, provider, provider_service)

        # four files in grid fs
        current_files = self.app.media.fs('upload').find()
        self.assertEqual(4, current_files.count())

        remove = RemoveExpiredContent()
        remove.run(provider.get('type'))

        # all gone
        current_files = self.app.media.fs('upload').find()
        self.assertEqual(0, current_files.count())
    def test_expiring_with_content(self):
        provider, provider_service = self.setup_reuters_provider()
        items = provider_service.fetch_ingest(reuters_guid)
        now = utcnow()
        for i, item in enumerate(items):
            item['ingest_provider'] = provider['_id']
            expiry_time = now - timedelta(hours=11)
            if i > 4:
                expiry_time = now + timedelta(minutes=11)

            item['expiry'] = item['versioncreated'] = expiry_time

        service = get_resource_service('ingest')
        service.post(items)

        # ingest the items and expire them
        before = service.get(req=None, lookup={})
        self.assertEqual(6, before.count())

        remove = RemoveExpiredContent()
        remove.run(provider.get('type'))

        # only one left in ingest
        after = service.get(req=None, lookup={})
        self.assertEqual(1, after.count())

        req = ParsedRequest()
        self.assertEqual(1, self.app.data.elastic.find('ingest', req, {}).count())
        self.assertEqual(1, self.app.data.mongo.find('ingest', req, {}).count())
    def test_expiring_content_with_files(self):
        provider_name = 'reuters'
        guid = 'tag_reuters.com_2014_newsml_KBN0FL0NM:10'
        provider = get_resource_service('ingest_providers').find_one(name=provider_name, req=None)
        provider_service = self._get_provider_service(provider)
        provider_service.provider = provider
        provider_service.URL = provider.get('config', {}).get('url')
        items = provider_service.fetch_ingest(guid)
        for item in items:
            item['ingest_provider'] = provider['_id']

        now = utcnow()
        items[0]['expiry'] = now - timedelta(hours=11)
        items[1]['expiry'] = now - timedelta(hours=11)
        items[2]['expiry'] = now + timedelta(hours=11)
        items[5]['versioncreated'] = now + timedelta(minutes=11)

        # ingest the items and expire them
        self.ingest_items(items, provider, provider_service)

        # four files in grid fs
        current_files = self.app.media.fs('upload').find()
        self.assertEqual(4, current_files.count())

        remove = RemoveExpiredContent()
        remove.run(provider.get('type'))

        # all gone
        current_files = self.app.media.fs('upload').find()
        self.assertEqual(0, current_files.count())
    def test_removing_expired_items_from_elastic_only(self):
        now = utcnow()
        self.app.data.elastic.insert('ingest', [
            {'_id': 'foo', 'expiry': now - timedelta(minutes=30)},
            {'_id': 'bar', 'expiry': now + timedelta(minutes=30)},
        ])

        RemoveExpiredContent().run()
        self.assertEqual(1, self.app.data.elastic.find('ingest', ParsedRequest(), {}).count())
    def test_removing_expired_items_from_elastic_only(self):
        now = utcnow()
        self.app.data.elastic.insert(
            "ingest",
            [
                {
                    "_id": "foo",
                    "expiry": now - timedelta(minutes=30)
                },
                {
                    "_id": "bar",
                    "expiry": now + timedelta(minutes=30)
                },
            ],
        )

        RemoveExpiredContent().run()
        self.assertEqual(
            1,
            self.app.data.elastic.find("ingest", ParsedRequest(), {}).count())
Example #11
0
def gc_ingest():
    RemoveExpiredContent().run()