Ejemplo n.º 1
0
    def _with_exception(self, link, ignore_exceptions):
        self.planner = SchedulePlanner(ignore_exceptions=ignore_exceptions)
        self.planner.immediate_transfer = False  # otherwise planner will never start
        link.transfer.side_effect = DummyException()
        link.interval.total_seconds.return_value = 0.02
        self.planner._refresh_interval = 0.02

        link.transfer.side_effect = DummyException()
        link.interval.total_seconds.return_value = 0.02
        self.planner.add_links(link)

        with self.assertLogs(logging.getLogger('databay.BasePlanner'),
                             level='WARNING') as cm:

            th = Thread(target=self.planner.start, daemon=True)
            th.start()
            time.sleep(0.04)
            link.transfer.assert_called()

            if ignore_exceptions:
                self.assertTrue(self.planner.running,
                                'Planner should be running')
                self.planner.shutdown(wait=False)
                th.join(timeout=2)
                self.assertFalse(th.is_alive(), 'Thread should be stopped.')

            self.assertFalse(self.planner.running, 'Planner should be stopped')
            self.assertTrue('I\'m a dummy exception' in ';'.join(cm.output))
Ejemplo n.º 2
0
    def setUp(self, link):
        self.planner = SchedulePlanner(refresh_interval=0.02)

        def set_job(job):
            link.job = job

        link.interval.total_seconds.return_value = 0.02
        link.set_job.side_effect = set_job
        link.job = None
        self.link = link
Ejemplo n.º 3
0
    def setUp(self):
        self.planner = SchedulePlanner(refresh_interval=0.02)

        link = MagicMock(spec=Link)

        def set_job(job):
            link.job = job

        link.interval.total_seconds.return_value = 0.02
        link.set_job.side_effect = set_job
        link.job = None
        link.immediate_transfer = True
        self.link = link
Ejemplo n.º 4
0
    def run(self):
        # planner = APSPlanner()
        planner = SchedulePlanner(refresh_interval=0.5)

        http_inlet = HttpInlet('https://jsonplaceholder.typicode.com/todos/1',
                               metadata={CsvOutlet.FILE_MODE: 'a'})
        file_inlet = FileInlet('output_03.csv', read_mode=FileInletMode.LINE)
        http_inlet2 = HttpInlet(
            'https://postman-echo.com/get?foo1=bar1&foo2=bar2',
            metadata={
                'MONGODB_COLLECTION': 'test_collection2',
                'csv_file': 'output_02.csv'
            })

        print_outlet = PrintOutlet(only_payload=True)
        mongo_outlet = MongoOutlet('databay', 'test_collection')
        csv_outlet = CsvOutlet('output_03.csv')

        planner.add_links(
            Link([file_inlet], [print_outlet], timedelta(seconds=0.5)))
        planner.add_links(
            Link([http_inlet, http_inlet, http_inlet], [csv_outlet],
                 timedelta(seconds=2)))
        # planner.add_links(Link([http_inlet], [mongo_outlet], timedelta(seconds=1), name='first'))
        # planner.add_links(Link([http_inlet2, http_inlet2, http_inlet2], [mongo_outlet], timedelta(seconds=5), name='second'))
        # planner.add_links(Link([], [], timedelta(seconds=1.5)))
        # planner.add_links(Link([alphavantage_inlet], [mongo_outlet], timedelta(seconds=5)))
        # planner.add_links(Link([iex_inlet], [mongo_outlet], timedelta(seconds=5)))
        planner.start()
Ejemplo n.º 5
0
    def _with_exception(self, link, catch_exceptions):
        logging.getLogger('databay').setLevel(logging.CRITICAL)
        self.planner = SchedulePlanner(catch_exceptions=catch_exceptions)
        link.transfer.side_effect = DummyException()
        link.interval.total_seconds.return_value = 0.02
        self.planner._refresh_interval = 0.02

        link.transfer.side_effect = DummyException()
        link.interval.total_seconds.return_value = 0.02
        self.planner.add_links(link)

        th = Thread(target=self.planner.start, daemon=True)
        th.start()
        time.sleep(0.04)
        link.transfer.assert_called()

        if catch_exceptions:
            self.assertTrue(self.planner.running,
                            'Scheduler should be running')
            self.planner.shutdown(False)
            th.join(timeout=2)
            self.assertFalse(th.is_alive(), 'Thread should be stopped.')

        self.assertFalse(self.planner.running, 'Scheduler should be stopped')
Ejemplo n.º 6
0
    def test_flush_after_shutdown(self, inlet, outlet):
        buffer = Buffer(count_threshold=100, time_threshold=10)

        counter_dict = {'counter': 0, 'records': []}

        link = Link(inlet,
                    outlet,
                    interval=0.01,
                    processors=buffer,
                    copy_records=False)
        planner = SchedulePlanner(link, refresh_interval=0.01)

        async def pull_coro(_):
            counter_dict['counter'] += 1
            record = Record(payload=counter_dict['counter'])
            counter_dict['records'].append(record)
            return [record]

        mock_pull = MagicMock(side_effect=pull_coro)
        inlet._pull = mock_pull

        th = Thread(target=planner.start, daemon=True)
        th.start()
        time.sleep(0.1)

        planner.shutdown()
        th.join()

        calls = outlet._push.call_args_list
        for c in calls:
            self.assertEqual(c(), [],
                             'Should only contain empty record lists.')
        self.assertEqual(buffer.records, counter_dict['records'],
                         'All records should be stored in the buffer')

        planner.force_transfer()
        self.assertEqual(outlet._push.call_args(), [],
                         'Should return empty record list')

        buffer.flush = True
        planner.force_transfer()
        self.assertEqual(outlet._push.call_args[0][0], counter_dict['records'],
                         'Should return all records')
Ejemplo n.º 7
0
class PrintOutlet(Outlet):

    async def push(self, records:[Record], update):
        _LOGGER.debug(f'{update} push starts')

        # create an asynchronous task for each record
        tasks = [self.print_task(record, update) for record in records]

        # await all print tasks
        await asyncio.gather(*tasks)

    async def print_task(self, record, update):

        # simulate a long-taking operation
        await asyncio.sleep(0.5)

        # execute
        _LOGGER.debug(f'{update} consumed:{record.payload}')

random_int_inletA = RandomIntInlet()
random_int_inletB = RandomIntInlet()
random_int_inletC = RandomIntInlet()
print_outlet = PrintOutlet()

link = Link([random_int_inletA, random_int_inletB, random_int_inletC],
            print_outlet,
            interval=timedelta(seconds=2),
            name='async')

planner = SchedulePlanner(link)
planner.start()
Ejemplo n.º 8
0
 def test_add_links_on_init(self):
     self.planner = SchedulePlanner(self.link, refresh_interval=0.02)
     self.assertIsNotNone(self.link.job, 'Link should contain a job')
     self.assertTrue(self.link in self.planner.links,
                     'Planner should contain the link')
Ejemplo n.º 9
0
class TestSchedulePlanner(TestCase):
    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)
        logging.getLogger('databay').setLevel(logging.WARNING)

    def setUp(self):
        self.planner = SchedulePlanner(refresh_interval=0.02)

        link = MagicMock(spec=Link)

        def set_job(job):
            link.job = job

        link.interval.total_seconds.return_value = 0.02
        link.set_job.side_effect = set_job
        link.job = None
        link.immediate_transfer = True
        self.link = link

    def tearDown(self):
        if len(schedule.jobs) > 0:
            schedule.clear()

    def test__run_job(self):
        self.planner._create_thread_pool()
        self.planner._run_job(self.link)
        self.link.transfer.assert_called_once()
        self.planner._destroy_thread_pool()

    def test__schedule(self):
        self.planner._schedule(self.link)
        self.assertIsNotNone(self.link.job, 'Link should contain a job')
        schedule_job = schedule.jobs[0]
        self.assertEqual(self.link.job, schedule_job,
                         'Link\'s job should be same as schedule\'s')
        # self.planner._unschedule(link)

    def test__unschedule(self):
        self.planner._schedule(self.link)
        self.planner._unschedule(self.link)
        self.assertIsNone(self.link.job, 'Link should not contain a job')
        self.assertEqual(len(schedule.jobs), 0,
                         'Schedule should not have any jobs')

    def test__unschedule_invalid(self):
        self.planner._unschedule(self.link)
        self.assertIsNone(self.link.job, 'Link should not contain a job')
        self.assertEqual(len(schedule.jobs), 0,
                         'Scheduler should not have any jobs')

    def test_add_links(self):
        self.planner.add_links(self.link)
        self.assertIsNotNone(self.link.job, 'Link should contain a job')
        self.assertTrue(self.link in self.planner.links,
                        'Planner should contain the link')

    def test_add_links_on_init(self):
        self.planner = SchedulePlanner(self.link, refresh_interval=0.02)
        self.assertIsNotNone(self.link.job, 'Link should contain a job')
        self.assertTrue(self.link in self.planner.links,
                        'Planner should contain the link')

    def test_remove_links(self):
        self.planner.add_links(self.link)
        self.planner.remove_links(self.link)
        self.assertIsNone(self.link.job, 'Link should not contain a job')
        self.assertTrue(self.link not in self.planner.links,
                        'Planner should not contain the link')

    def test_remove_invalid_link(self):
        self.assertRaises(MissingLinkError, self.planner.remove_links,
                          self.link)
        self.assertIsNone(self.link.job, 'Link should not contain a job')
        self.assertTrue(self.link not in self.planner.links,
                        'Planner should not contain the link')

    def test_start(self):
        th = Thread(target=self.planner.start, daemon=True)
        th.start()
        self.assertTrue(self.planner._running, 'Planner should be running')
        self.planner.shutdown()
        th.join(timeout=2)
        self.assertFalse(th.is_alive(), 'Thread should be stopped.')

    def test_start_when_already_running(self):
        th = Thread(target=self.planner.start, daemon=True)
        th.start()
        self.assertTrue(self.planner._running, 'Planner should be running')
        th2 = Thread(target=self.planner.start, daemon=True)
        th2.start()
        self.assertFalse(th2.is_alive(),
                         'Starting again should instantly exit.')
        self.planner.shutdown()
        th.join(timeout=2)
        self.assertFalse(th.is_alive(), 'Thread should be stopped.')

    def test_shutdown(self):
        th = Thread(target=self.planner.start, daemon=True)
        th.start()
        self.planner.shutdown()
        self.assertFalse(self.planner._running,
                         'Planner should be not running')
        self.assertIsNone(self.planner._thread_pool,
                          'Planner should not have a thread pool')
        th.join(timeout=2)
        self.assertFalse(th.is_alive(), 'Thread should be stopped.')

    def test_add_and_run(self):
        self.link.interval.total_seconds.return_value = 0.02
        self.planner._refresh_interval = 0.02
        self.planner.add_links(self.link)

        th = Thread(target=self.planner.start, daemon=True)
        th.start()
        time.sleep(0.04)
        self.link.transfer.assert_called()

        self.planner.shutdown()
        th.join(timeout=2)
        self.assertFalse(th.is_alive(), 'Thread should be stopped.')

    def test_invalid_interval(self):
        self.link.interval.total_seconds.return_value = 0.1
        self.planner._refresh_interval = 0.2

        self.assertRaises(ScheduleIntervalError, self.planner.add_links,
                          self.link)

    def _with_exception(self, link, ignore_exceptions):
        self.planner = SchedulePlanner(ignore_exceptions=ignore_exceptions)
        self.planner.immediate_transfer = False  # otherwise planner will never start
        link.transfer.side_effect = DummyException()
        link.interval.total_seconds.return_value = 0.02
        self.planner._refresh_interval = 0.02

        link.transfer.side_effect = DummyException()
        link.interval.total_seconds.return_value = 0.02
        self.planner.add_links(link)

        with self.assertLogs(logging.getLogger('databay.BasePlanner'),
                             level='WARNING') as cm:

            th = Thread(target=self.planner.start, daemon=True)
            th.start()
            time.sleep(0.04)
            link.transfer.assert_called()

            if ignore_exceptions:
                self.assertTrue(self.planner.running,
                                'Planner should be running')
                self.planner.shutdown(wait=False)
                th.join(timeout=2)
                self.assertFalse(th.is_alive(), 'Thread should be stopped.')

            self.assertFalse(self.planner.running, 'Planner should be stopped')
            self.assertTrue('I\'m a dummy exception' in ';'.join(cm.output))

    def test_ignore_exception(self):
        self._with_exception(self.link, True)

    def test_raise_exception(self):
        self._with_exception(self.link, False)

    def test_uncommon_exception(self):

        self.link.transfer.side_effect = DummyUnusualException(argA=123,
                                                               argB=True)
        self.link.interval.total_seconds.return_value = 0.02
        self.planner.add_links(self.link)

        with self.assertLogs(logging.getLogger('databay.BasePlanner'),
                             level='WARNING') as cm:

            th = Thread(target=self.planner.start, daemon=True)
            th.start()
            time.sleep(0.04)
            self.link.transfer.assert_called()

            self.assertFalse(self.planner.running,
                             'Scheduler should be stopped')
            self.assertTrue('123, True, I\'m an unusual dummy exception' in
                            ';'.join(cm.output))

    def test_purge(self):
        self.link.interval.total_seconds.return_value = 0.02
        self.planner.add_links(self.link)
        self.planner.purge()

        self.link.set_job.assert_called_with(None)
        self.assertEqual(self.planner.links, [])
        self.assertEqual(schedule.jobs, [])

    def test_purge_while_running(self):
        self.planner.add_links(self.link)
        th = Thread(target=self.planner.start, daemon=True)
        th.start()
        self.planner.purge()

        self.link.set_job.assert_called_with(None)
        self.assertEqual(self.planner.links, [])
        self.assertEqual(schedule.jobs, [])

        self.planner.shutdown()
        th.join(timeout=2)
        self.assertFalse(th.is_alive(), 'Thread should be stopped.')

    def test_start_when_already_running(self):
        th = Thread(target=self.planner.start, daemon=True)
        th.start()
        self.assertTrue(self.planner._running, 'Planner should be running')
        th2 = Thread(target=self.planner._start_planner, daemon=True)
        th2.start()  # this shouldn't do anything as we're already running
        th2.join()
        self.assertFalse(th2.is_alive(),
                         'Second start thread should have exited.')
        self.planner.shutdown()
        th.join(timeout=2)
        self.assertFalse(th.is_alive(), 'Thread should be stopped.')

    def test_immediate_transfer(self):
        self.link.interval.total_seconds.return_value = 10
        self.planner.add_links(self.link)
        th = Thread(target=self.planner.start, daemon=True)
        th.start()
        time.sleep(0.01)
        self.link.transfer.assert_called_once()
        self.planner.shutdown()
        th.join(timeout=2)
        self.assertFalse(th.is_alive(), 'Thread should be stopped.')

    def test_immediate_transfer_exception(self):
        self.link.interval.total_seconds.return_value = 10
        self.planner._ignore_exceptions = False
        self.link.transfer.side_effect = DummyException(
            'First transfer exception!')
        self.planner.add_links(self.link)
        with self.assertLogs(logging.getLogger('databay.BasePlanner'),
                             level='WARNING') as cm:
            th = Thread(target=self.planner.start, daemon=True)
            th.start()
            self.link.transfer.assert_called_once()
            self.assertFalse(self.planner.running,
                             'Planner should not have started')
            th.join(timeout=2)
            self.assertFalse(th.is_alive(), 'Thread should be stopped.')
            self.assertTrue('First transfer exception!' in ';'.join(cm.output))

    def test_immediate_transfer_off(self):
        self.link.interval.total_seconds.return_value = 10
        self.planner.immediate_transfer = False
        self.planner.add_links(self.link)
        th = Thread(target=self.planner.start, daemon=True)
        th.start()
        time.sleep(0.01)
        self.link.transfer.assert_not_called()
        self.planner.shutdown()
        th.join(timeout=2)
        self.assertFalse(th.is_alive(), 'Thread should be stopped.')
Ejemplo n.º 10
0
from databay import Link
from databay.inlets import RandomIntInlet
from databay.outlet import Outlet
from databay.planners import SchedulePlanner
from databay.record import Record


class ConditionalPrintOutlet(Outlet):

    SHOULD_PRINT = 'ConditionalPrintOutlet.SHOULD_PRINT'
    """Whether records should be printed or skipped."""
    def push(self, records: [Record], update):
        for record in records:
            if record.metadata.get(self.SHOULD_PRINT):
                print(update, record)


random_int_inlet_on = RandomIntInlet(
    metadata={ConditionalPrintOutlet.SHOULD_PRINT: True})
random_int_inlet_off = RandomIntInlet(
    metadata={ConditionalPrintOutlet.SHOULD_PRINT: False})

print_outlet = ConditionalPrintOutlet()

link = Link([random_int_inlet_on, random_int_inlet_off],
            print_outlet,
            interval=timedelta(seconds=0.5),
            name='should_print_metadata')

planner = SchedulePlanner(link, refresh_interval=0.5)
planner.start()
Ejemplo n.º 11
0
class TestSchedulePlanner(TestCase):
    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)
        logging.getLogger('databay').setLevel(logging.WARNING)

    @patch(fqname(Link), spec=Link)
    def setUp(self, link):
        self.planner = SchedulePlanner(refresh_interval=0.02)

        def set_job(job):
            link.job = job

        link.interval.total_seconds.return_value = 0.02
        link.set_job.side_effect = set_job
        link.job = None
        self.link = link

    def tearDown(self):
        if len(schedule.jobs) > 0:
            schedule.clear()

    def test__run_job(self):
        self.planner._create_thread_pool()
        self.planner._run_job(self.link)
        self.link.transfer.assert_called_once()
        self.planner._destroy_thread_pool()

    def test__schedule(self):
        self.planner._schedule(self.link)
        self.assertIsNotNone(self.link.job, 'Link should contain a job')
        schedule_job = schedule.jobs[0]
        self.assertEqual(self.link.job, schedule_job,
                         'Link\'s job should be same as schedule\'s')
        # self.planner._unschedule(link)

    def test__unschedule(self):
        self.planner._schedule(self.link)
        self.planner._unschedule(self.link)
        self.assertIsNone(self.link.job, 'Link should not contain a job')
        self.assertEqual(len(schedule.jobs), 0,
                         'Schedule should not have any jobs')

    def test__unschedule_invalid(self):
        self.planner._unschedule(self.link)
        self.assertIsNone(self.link.job, 'Link should not contain a job')
        self.assertEqual(len(schedule.jobs), 0,
                         'Scheduler should not have any jobs')

    def test_add_links(self):
        self.planner.add_links(self.link)
        self.assertIsNotNone(self.link.job, 'Link should contain a job')
        self.assertTrue(self.link in self.planner.links,
                        'Planner should contain the link')

    def test_add_links_on_init(self):
        self.planner = SchedulePlanner(self.link, refresh_interval=0.02)
        self.assertIsNotNone(self.link.job, 'Link should contain a job')
        self.assertTrue(self.link in self.planner.links,
                        'Planner should contain the link')

    def test_remove_links(self):
        self.planner.add_links(self.link)
        self.planner.remove_links(self.link)
        self.assertIsNone(self.link.job, 'Link should not contain a job')
        self.assertTrue(self.link not in self.planner.links,
                        'Planner should not contain the link')

    def test_remove_invalid_link(self):
        self.assertRaises(MissingLinkError, self.planner.remove_links,
                          self.link)
        self.assertIsNone(self.link.job, 'Link should not contain a job')
        self.assertTrue(self.link not in self.planner.links,
                        'Planner should not contain the link')

    def test_start(self):
        th = Thread(target=self.planner.start, daemon=True)
        th.start()
        self.assertTrue(self.planner._running, 'Planner should be running')
        self.planner.shutdown()
        th.join(timeout=2)
        self.assertFalse(th.is_alive(), 'Thread should be stopped.')

    def test_shutdown(self):
        th = Thread(target=self.planner.start, daemon=True)
        th.start()
        self.planner.shutdown()
        self.assertFalse(self.planner._running,
                         'Planner should be not running')
        self.assertIsNone(self.planner._thread_pool,
                          'Planner should not have a thread pool')
        th.join(timeout=2)
        self.assertFalse(th.is_alive(), 'Thread should be stopped.')

    def test_add_and_run(self):
        self.link.interval.total_seconds.return_value = 0.02
        self.planner._refresh_interval = 0.02
        self.planner.add_links(self.link)

        th = Thread(target=self.planner.start, daemon=True)
        th.start()
        time.sleep(0.04)
        self.link.transfer.assert_called()

        self.planner.shutdown()
        th.join(timeout=2)
        self.assertFalse(th.is_alive(), 'Thread should be stopped.')

    def test_invalid_interval(self):
        self.link.interval.total_seconds.return_value = 0.1
        self.planner._refresh_interval = 0.2

        self.assertRaises(ScheduleIntervalError, self.planner.add_links,
                          self.link)

    def _with_exception(self, link, catch_exceptions):
        logging.getLogger('databay').setLevel(logging.CRITICAL)
        self.planner = SchedulePlanner(catch_exceptions=catch_exceptions)
        link.transfer.side_effect = DummyException()
        link.interval.total_seconds.return_value = 0.02
        self.planner._refresh_interval = 0.02

        link.transfer.side_effect = DummyException()
        link.interval.total_seconds.return_value = 0.02
        self.planner.add_links(link)

        th = Thread(target=self.planner.start, daemon=True)
        th.start()
        time.sleep(0.04)
        link.transfer.assert_called()

        if catch_exceptions:
            self.assertTrue(self.planner.running,
                            'Scheduler should be running')
            self.planner.shutdown(False)
            th.join(timeout=2)
            self.assertFalse(th.is_alive(), 'Thread should be stopped.')

        self.assertFalse(self.planner.running, 'Scheduler should be stopped')

    def test_catch_exception(self):
        self._with_exception(self.link, True)

    def test_raise_exception(self):
        self._with_exception(self.link, False)

    def test_uncommon_exception(self):
        logging.getLogger('databay').setLevel(logging.CRITICAL)

        self.link.transfer.side_effect = DummyUnusualException(argA=123,
                                                               argB=True)
        self.link.interval.total_seconds.return_value = 0.02
        self.planner.add_links(self.link)

        th = Thread(target=self.planner.start, daemon=True)
        th.start()
        time.sleep(0.04)
        self.link.transfer.assert_called()

        self.assertFalse(self.planner.running, 'Scheduler should be stopped')