from superdesk.errors import SuperdeskApiError, ProviderError
from superdesk.io import register_feeding_service, registered_feeding_services
from .tests import setup_providers, teardown_providers
from superdesk.io.feeding_services import FeedingService
from superdesk.io.commands.remove_expired_content import get_expired_items, RemoveExpiredContent
from superdesk.celery_task_utils import mark_task_as_not_running, is_task_running
from test_factory import SuperdeskTestCase


class TestProviderService(FeedingService):

    def _update(self, provider):
        return []


register_feeding_service('test', TestProviderService(), [ProviderError.anpaError(None, None).get_error_description()])


class CeleryTaskRaceTest(SuperdeskTestCase):

    def test_the_second_update_fails_if_already_running(self):
        provider = {'_id': 'abc', 'name': 'test provider', 'update_schedule': {'minutes': 1}}
        removed = mark_task_as_not_running(provider['name'], provider['_id'])
        self.assertFalse(removed)

        failed_to_mark_as_running = is_task_running(provider['name'], provider['_id'], {'minutes': 1})
        self.assertFalse(failed_to_mark_as_running, 'Failed to mark ingest update as running')

        failed_to_mark_as_running = is_task_running(provider['name'], provider['_id'], {'minutes': 1})
        self.assertTrue(failed_to_mark_as_running, 'Ingest update marked as running, possible race condition')
Example #2
0
        try:
            if not os.path.exists(os.path.join(file_path, "_PROCESSED/")):
                os.makedirs(os.path.join(file_path, "_PROCESSED/"))
            if not os.path.exists(os.path.join(file_path, "_ERROR/")):
                os.makedirs(os.path.join(file_path, "_ERROR/"))
        except Exception as ex:
            raise IngestFileError.folderCreateError(ex, provider)

        try:
            if success:
                shutil.copy2(os.path.join(file_path, filename), os.path.join(file_path, "_PROCESSED/"))
            else:
                shutil.copy2(os.path.join(file_path, filename), os.path.join(file_path, "_ERROR/"))
        except Exception as ex:
            raise IngestFileError.fileMoveError(ex, provider)
        finally:
            os.remove(os.path.join(file_path, filename))

    def is_latest_content(self, last_updated, provider_last_updated=None):
        """
        Parse file only if it's not older than provider last update -10m
        """

        if not provider_last_updated:
            provider_last_updated = utcnow() - timedelta(days=7)

        return provider_last_updated - timedelta(minutes=10) < last_updated


register_feeding_service(FileFeedingService.NAME, FileFeedingService(), FileFeedingService.ERRORS)
Example #3
0
                                os.remove(local_file_path)
                                logger.exception(
                                    'Exception retrieving from FTP server')
                                continue
                    except FileExistsError:
                        continue

                    registered_parser = self.get_feed_parser(provider)
                    if isinstance(registered_parser, XMLFeedParser):
                        xml = etree.parse(local_file_path).getroot()
                        parser = self.get_feed_parser(provider, xml)
                        parsed = parser.parse(xml, provider)
                    else:
                        parser = self.get_feed_parser(provider,
                                                      local_file_path)
                        parsed = parser.parse(local_file_path, provider)

                    if isinstance(parsed, dict):
                        parsed = [parsed]

                    items.append(parsed)
            return items
        except IngestFtpError:
            raise
        except Exception as ex:
            raise IngestFtpError.ftpError(ex, provider)


register_feeding_service(FTPFeedingService.NAME, FTPFeedingService(),
                         FTPFeedingService.ERRORS)
Example #4
0
        return result_items

    def _parse_items(self, guid):
        """
        Parse item message and return given items.
        """

        payload = {'id': guid}
        tree = self._get_tree('item', payload)

        parser = self.get_feed_parser(self.provider, tree)
        items = parser.parse(tree, self.provider)

        return items

    def _fetch_items_in_package(self, item):
        """
        Fetch remote assets for given item.
        """
        items = []
        for group in item.get('groups', []):
            for ref in group.get('refs', []):
                if 'residRef' in ref:
                    items.extend(self._parse_items(ref.get('residRef')))

        return items


register_feeding_service(ReutersHTTPFeedingService.NAME, ReutersHTTPFeedingService(), ReutersHTTPFeedingService.ERRORS)
Example #5
0
        """
        package = {
            ITEM_TYPE: CONTENT_TYPE.COMPOSITE,
            'guid': generate_guid(type=GUID_TAG),
            'firstcreated': text_item['firstcreated'],
            'versioncreated': text_item['versioncreated'],
            'headline': text_item.get('headline', ''),
            'groups': [
                {
                    'id': 'root',
                    'role': 'grpRole:NEP',
                    'refs': [{'idRef': 'main'}],
                }, {
                    'id': 'main',
                    'role': 'main',
                    'refs': [],
                }
            ]
        }

        item_references = package['groups'][1]['refs']
        item_references.append({'residRef': text_item['guid']})

        for image in image_items:
            item_references.append({'residRef': image['guid']})

        return package


register_feeding_service(RSSFeedingService.NAME, RSSFeedingService(), RSSFeedingService.ERRORS)
Example #6
0
        """
        package = {
            ITEM_TYPE: CONTENT_TYPE.COMPOSITE,
            'guid': generate_guid(type=GUID_TAG),
            'firstcreated': text_item['firstcreated'],
            'versioncreated': text_item['versioncreated'],
            'headline': text_item.get('headline', ''),
            'groups': [
                {
                    'id': 'root',
                    'role': 'grpRole:NEP',
                    'refs': [{'idRef': 'main'}],
                }, {
                    'id': 'main',
                    'role': 'main',
                    'refs': [],
                }
            ]
        }

        item_references = package['groups'][1]['refs']
        item_references.append({'residRef': text_item['guid']})

        for image in image_items:
            item_references.append({'residRef': image['guid']})

        return package


register_feeding_service(RSSFeedingService.NAME, RSSFeedingService(), RSSFeedingService.ERRORS)
Example #7
0
            except imaplib.IMAP4.error:
                raise IngestEmailError.emailLoginError(imaplib.IMAP4.error, provider)

            rv, data = imap.select(config.get('mailbox', None), readonly=False)
            if rv == 'OK':
                rv, data = imap.search(None, config.get('filter', '(UNSEEN)'))
                if rv == 'OK':
                    new_items = []
                    for num in data[0].split():
                        rv, data = imap.fetch(num, '(RFC822)')
                        if rv == 'OK':
                            try:
                                parser = self.get_feed_parser(provider, data)
                                new_items.append(parser.parse(data, provider))
                                rv, data = imap.store(num, '+FLAGS', '\\Seen')
                            except IngestEmailError:
                                continue
                imap.close()
            imap.logout()
        except IngestEmailError:
            raise
        except Exception as ex:
            raise IngestEmailError.emailError(ex, provider)
        return new_items

    def prepare_href(self, href, mimetype=None):
        return url_for_media(href, mimetype)


register_feeding_service(EmailFeedingService.NAME, EmailFeedingService(), EmailFeedingService.ERRORS)
Example #8
0
        'tokens': {
            'auth_token': token,
            'created': utcnow() - timedelta(hours=hours),
        }
    }


class TestFeedingService(HTTPFeedingService):
    NAME = 'test'
    ERRORS = []

    def _update(self, provider):
        pass


register_feeding_service(TestFeedingService.NAME, TestFeedingService(), TestFeedingService.ERRORS)


class GetTokenTestCase(TestCase):

    def setUp(self):
        super().setUp()

    def test_get_null_token(self):
        provider = {}
        self.assertEquals('', TestFeedingService()._get_auth_token(provider))

    def test_get_existing_token(self):
        provider = setup_provider('abc', 10)
        self.assertEquals('abc', TestFeedingService()._get_auth_token(provider))
Example #9
0
from superdesk.errors import SuperdeskApiError, ProviderError
from superdesk.io import register_feeding_service, registered_feeding_services
from .tests import setup_providers, teardown_providers
from superdesk.io.feeding_services import FeedingService
from superdesk.io.commands.remove_expired_content import get_expired_items, RemoveExpiredContent
from superdesk.celery_task_utils import mark_task_as_not_running, is_task_running
from test_factory import SuperdeskTestCase


class TestProviderService(FeedingService):
    def _update(self, provider):
        return []


register_feeding_service(
    'test', TestProviderService(),
    [ProviderError.anpaError(None, None).get_error_description()])


class CeleryTaskRaceTest(SuperdeskTestCase):
    def test_the_second_update_fails_if_already_running(self):
        provider = {
            '_id': 'abc',
            'name': 'test provider',
            'update_schedule': {
                'minutes': 1
            }
        }
        removed = mark_task_as_not_running(provider['name'], provider['_id'])
        self.assertFalse(removed)
Example #10
0
        'token': {
            'token': token,
            'created': utcnow() - timedelta(hours=hours),
        }
    }


class TestFeedingService(HTTPFeedingService):
    NAME = 'test'
    ERRORS = []

    def _update(self, provider):
        pass


register_feeding_service(TestFeedingService.NAME, TestFeedingService(),
                         TestFeedingService.ERRORS)


class GetTokenTestCase(TestCase):
    def setUp(self):
        super().setUp()

    def test_get_null_token(self):
        provider = {}
        self.assertEquals('', TestFeedingService()._get_auth_token(provider))

    def test_get_existing_token(self):
        provider = setup_provider('abc', 10)
        self.assertEquals('abc',
                          TestFeedingService()._get_auth_token(provider))