def _with_exception(self, link, ignore_exceptions): self.planner = SchedulePlanner(ignore_exceptions=ignore_exceptions) self.planner.immediate_transfer = False # otherwise planner will never start link.transfer.side_effect = DummyException() link.interval.total_seconds.return_value = 0.02 self.planner._refresh_interval = 0.02 link.transfer.side_effect = DummyException() link.interval.total_seconds.return_value = 0.02 self.planner.add_links(link) with self.assertLogs(logging.getLogger('databay.BasePlanner'), level='WARNING') as cm: th = Thread(target=self.planner.start, daemon=True) th.start() time.sleep(0.04) link.transfer.assert_called() if ignore_exceptions: self.assertTrue(self.planner.running, 'Planner should be running') self.planner.shutdown(wait=False) th.join(timeout=2) self.assertFalse(th.is_alive(), 'Thread should be stopped.') self.assertFalse(self.planner.running, 'Planner should be stopped') self.assertTrue('I\'m a dummy exception' in ';'.join(cm.output))
def setUp(self, link): self.planner = SchedulePlanner(refresh_interval=0.02) def set_job(job): link.job = job link.interval.total_seconds.return_value = 0.02 link.set_job.side_effect = set_job link.job = None self.link = link
def setUp(self): self.planner = SchedulePlanner(refresh_interval=0.02) link = MagicMock(spec=Link) def set_job(job): link.job = job link.interval.total_seconds.return_value = 0.02 link.set_job.side_effect = set_job link.job = None link.immediate_transfer = True self.link = link
def run(self): # planner = APSPlanner() planner = SchedulePlanner(refresh_interval=0.5) http_inlet = HttpInlet('https://jsonplaceholder.typicode.com/todos/1', metadata={CsvOutlet.FILE_MODE: 'a'}) file_inlet = FileInlet('output_03.csv', read_mode=FileInletMode.LINE) http_inlet2 = HttpInlet( 'https://postman-echo.com/get?foo1=bar1&foo2=bar2', metadata={ 'MONGODB_COLLECTION': 'test_collection2', 'csv_file': 'output_02.csv' }) print_outlet = PrintOutlet(only_payload=True) mongo_outlet = MongoOutlet('databay', 'test_collection') csv_outlet = CsvOutlet('output_03.csv') planner.add_links( Link([file_inlet], [print_outlet], timedelta(seconds=0.5))) planner.add_links( Link([http_inlet, http_inlet, http_inlet], [csv_outlet], timedelta(seconds=2))) # planner.add_links(Link([http_inlet], [mongo_outlet], timedelta(seconds=1), name='first')) # planner.add_links(Link([http_inlet2, http_inlet2, http_inlet2], [mongo_outlet], timedelta(seconds=5), name='second')) # planner.add_links(Link([], [], timedelta(seconds=1.5))) # planner.add_links(Link([alphavantage_inlet], [mongo_outlet], timedelta(seconds=5))) # planner.add_links(Link([iex_inlet], [mongo_outlet], timedelta(seconds=5))) planner.start()
def _with_exception(self, link, catch_exceptions): logging.getLogger('databay').setLevel(logging.CRITICAL) self.planner = SchedulePlanner(catch_exceptions=catch_exceptions) link.transfer.side_effect = DummyException() link.interval.total_seconds.return_value = 0.02 self.planner._refresh_interval = 0.02 link.transfer.side_effect = DummyException() link.interval.total_seconds.return_value = 0.02 self.planner.add_links(link) th = Thread(target=self.planner.start, daemon=True) th.start() time.sleep(0.04) link.transfer.assert_called() if catch_exceptions: self.assertTrue(self.planner.running, 'Scheduler should be running') self.planner.shutdown(False) th.join(timeout=2) self.assertFalse(th.is_alive(), 'Thread should be stopped.') self.assertFalse(self.planner.running, 'Scheduler should be stopped')
def test_flush_after_shutdown(self, inlet, outlet): buffer = Buffer(count_threshold=100, time_threshold=10) counter_dict = {'counter': 0, 'records': []} link = Link(inlet, outlet, interval=0.01, processors=buffer, copy_records=False) planner = SchedulePlanner(link, refresh_interval=0.01) async def pull_coro(_): counter_dict['counter'] += 1 record = Record(payload=counter_dict['counter']) counter_dict['records'].append(record) return [record] mock_pull = MagicMock(side_effect=pull_coro) inlet._pull = mock_pull th = Thread(target=planner.start, daemon=True) th.start() time.sleep(0.1) planner.shutdown() th.join() calls = outlet._push.call_args_list for c in calls: self.assertEqual(c(), [], 'Should only contain empty record lists.') self.assertEqual(buffer.records, counter_dict['records'], 'All records should be stored in the buffer') planner.force_transfer() self.assertEqual(outlet._push.call_args(), [], 'Should return empty record list') buffer.flush = True planner.force_transfer() self.assertEqual(outlet._push.call_args[0][0], counter_dict['records'], 'Should return all records')
class PrintOutlet(Outlet): async def push(self, records:[Record], update): _LOGGER.debug(f'{update} push starts') # create an asynchronous task for each record tasks = [self.print_task(record, update) for record in records] # await all print tasks await asyncio.gather(*tasks) async def print_task(self, record, update): # simulate a long-taking operation await asyncio.sleep(0.5) # execute _LOGGER.debug(f'{update} consumed:{record.payload}') random_int_inletA = RandomIntInlet() random_int_inletB = RandomIntInlet() random_int_inletC = RandomIntInlet() print_outlet = PrintOutlet() link = Link([random_int_inletA, random_int_inletB, random_int_inletC], print_outlet, interval=timedelta(seconds=2), name='async') planner = SchedulePlanner(link) planner.start()
def test_add_links_on_init(self): self.planner = SchedulePlanner(self.link, refresh_interval=0.02) self.assertIsNotNone(self.link.job, 'Link should contain a job') self.assertTrue(self.link in self.planner.links, 'Planner should contain the link')
class TestSchedulePlanner(TestCase): def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) logging.getLogger('databay').setLevel(logging.WARNING) def setUp(self): self.planner = SchedulePlanner(refresh_interval=0.02) link = MagicMock(spec=Link) def set_job(job): link.job = job link.interval.total_seconds.return_value = 0.02 link.set_job.side_effect = set_job link.job = None link.immediate_transfer = True self.link = link def tearDown(self): if len(schedule.jobs) > 0: schedule.clear() def test__run_job(self): self.planner._create_thread_pool() self.planner._run_job(self.link) self.link.transfer.assert_called_once() self.planner._destroy_thread_pool() def test__schedule(self): self.planner._schedule(self.link) self.assertIsNotNone(self.link.job, 'Link should contain a job') schedule_job = schedule.jobs[0] self.assertEqual(self.link.job, schedule_job, 'Link\'s job should be same as schedule\'s') # self.planner._unschedule(link) def test__unschedule(self): self.planner._schedule(self.link) self.planner._unschedule(self.link) self.assertIsNone(self.link.job, 'Link should not contain a job') self.assertEqual(len(schedule.jobs), 0, 'Schedule should not have any jobs') def test__unschedule_invalid(self): self.planner._unschedule(self.link) self.assertIsNone(self.link.job, 'Link should not contain a job') self.assertEqual(len(schedule.jobs), 0, 'Scheduler should not have any jobs') def test_add_links(self): self.planner.add_links(self.link) self.assertIsNotNone(self.link.job, 'Link should contain a job') self.assertTrue(self.link in self.planner.links, 'Planner should contain the link') def test_add_links_on_init(self): self.planner = SchedulePlanner(self.link, refresh_interval=0.02) self.assertIsNotNone(self.link.job, 'Link should contain a job') self.assertTrue(self.link in self.planner.links, 'Planner should contain the link') def test_remove_links(self): self.planner.add_links(self.link) self.planner.remove_links(self.link) self.assertIsNone(self.link.job, 'Link should not contain a job') self.assertTrue(self.link not in self.planner.links, 'Planner should not contain the link') def test_remove_invalid_link(self): self.assertRaises(MissingLinkError, self.planner.remove_links, self.link) self.assertIsNone(self.link.job, 'Link should not contain a job') self.assertTrue(self.link not in self.planner.links, 'Planner should not contain the link') def test_start(self): th = Thread(target=self.planner.start, daemon=True) th.start() self.assertTrue(self.planner._running, 'Planner should be running') self.planner.shutdown() th.join(timeout=2) self.assertFalse(th.is_alive(), 'Thread should be stopped.') def test_start_when_already_running(self): th = Thread(target=self.planner.start, daemon=True) th.start() self.assertTrue(self.planner._running, 'Planner should be running') th2 = Thread(target=self.planner.start, daemon=True) th2.start() self.assertFalse(th2.is_alive(), 'Starting again should instantly exit.') self.planner.shutdown() th.join(timeout=2) self.assertFalse(th.is_alive(), 'Thread should be stopped.') def test_shutdown(self): th = Thread(target=self.planner.start, daemon=True) th.start() self.planner.shutdown() self.assertFalse(self.planner._running, 'Planner should be not running') self.assertIsNone(self.planner._thread_pool, 'Planner should not have a thread pool') th.join(timeout=2) self.assertFalse(th.is_alive(), 'Thread should be stopped.') def test_add_and_run(self): self.link.interval.total_seconds.return_value = 0.02 self.planner._refresh_interval = 0.02 self.planner.add_links(self.link) th = Thread(target=self.planner.start, daemon=True) th.start() time.sleep(0.04) self.link.transfer.assert_called() self.planner.shutdown() th.join(timeout=2) self.assertFalse(th.is_alive(), 'Thread should be stopped.') def test_invalid_interval(self): self.link.interval.total_seconds.return_value = 0.1 self.planner._refresh_interval = 0.2 self.assertRaises(ScheduleIntervalError, self.planner.add_links, self.link) def _with_exception(self, link, ignore_exceptions): self.planner = SchedulePlanner(ignore_exceptions=ignore_exceptions) self.planner.immediate_transfer = False # otherwise planner will never start link.transfer.side_effect = DummyException() link.interval.total_seconds.return_value = 0.02 self.planner._refresh_interval = 0.02 link.transfer.side_effect = DummyException() link.interval.total_seconds.return_value = 0.02 self.planner.add_links(link) with self.assertLogs(logging.getLogger('databay.BasePlanner'), level='WARNING') as cm: th = Thread(target=self.planner.start, daemon=True) th.start() time.sleep(0.04) link.transfer.assert_called() if ignore_exceptions: self.assertTrue(self.planner.running, 'Planner should be running') self.planner.shutdown(wait=False) th.join(timeout=2) self.assertFalse(th.is_alive(), 'Thread should be stopped.') self.assertFalse(self.planner.running, 'Planner should be stopped') self.assertTrue('I\'m a dummy exception' in ';'.join(cm.output)) def test_ignore_exception(self): self._with_exception(self.link, True) def test_raise_exception(self): self._with_exception(self.link, False) def test_uncommon_exception(self): self.link.transfer.side_effect = DummyUnusualException(argA=123, argB=True) self.link.interval.total_seconds.return_value = 0.02 self.planner.add_links(self.link) with self.assertLogs(logging.getLogger('databay.BasePlanner'), level='WARNING') as cm: th = Thread(target=self.planner.start, daemon=True) th.start() time.sleep(0.04) self.link.transfer.assert_called() self.assertFalse(self.planner.running, 'Scheduler should be stopped') self.assertTrue('123, True, I\'m an unusual dummy exception' in ';'.join(cm.output)) def test_purge(self): self.link.interval.total_seconds.return_value = 0.02 self.planner.add_links(self.link) self.planner.purge() self.link.set_job.assert_called_with(None) self.assertEqual(self.planner.links, []) self.assertEqual(schedule.jobs, []) def test_purge_while_running(self): self.planner.add_links(self.link) th = Thread(target=self.planner.start, daemon=True) th.start() self.planner.purge() self.link.set_job.assert_called_with(None) self.assertEqual(self.planner.links, []) self.assertEqual(schedule.jobs, []) self.planner.shutdown() th.join(timeout=2) self.assertFalse(th.is_alive(), 'Thread should be stopped.') def test_start_when_already_running(self): th = Thread(target=self.planner.start, daemon=True) th.start() self.assertTrue(self.planner._running, 'Planner should be running') th2 = Thread(target=self.planner._start_planner, daemon=True) th2.start() # this shouldn't do anything as we're already running th2.join() self.assertFalse(th2.is_alive(), 'Second start thread should have exited.') self.planner.shutdown() th.join(timeout=2) self.assertFalse(th.is_alive(), 'Thread should be stopped.') def test_immediate_transfer(self): self.link.interval.total_seconds.return_value = 10 self.planner.add_links(self.link) th = Thread(target=self.planner.start, daemon=True) th.start() time.sleep(0.01) self.link.transfer.assert_called_once() self.planner.shutdown() th.join(timeout=2) self.assertFalse(th.is_alive(), 'Thread should be stopped.') def test_immediate_transfer_exception(self): self.link.interval.total_seconds.return_value = 10 self.planner._ignore_exceptions = False self.link.transfer.side_effect = DummyException( 'First transfer exception!') self.planner.add_links(self.link) with self.assertLogs(logging.getLogger('databay.BasePlanner'), level='WARNING') as cm: th = Thread(target=self.planner.start, daemon=True) th.start() self.link.transfer.assert_called_once() self.assertFalse(self.planner.running, 'Planner should not have started') th.join(timeout=2) self.assertFalse(th.is_alive(), 'Thread should be stopped.') self.assertTrue('First transfer exception!' in ';'.join(cm.output)) def test_immediate_transfer_off(self): self.link.interval.total_seconds.return_value = 10 self.planner.immediate_transfer = False self.planner.add_links(self.link) th = Thread(target=self.planner.start, daemon=True) th.start() time.sleep(0.01) self.link.transfer.assert_not_called() self.planner.shutdown() th.join(timeout=2) self.assertFalse(th.is_alive(), 'Thread should be stopped.')
from databay import Link from databay.inlets import RandomIntInlet from databay.outlet import Outlet from databay.planners import SchedulePlanner from databay.record import Record class ConditionalPrintOutlet(Outlet): SHOULD_PRINT = 'ConditionalPrintOutlet.SHOULD_PRINT' """Whether records should be printed or skipped.""" def push(self, records: [Record], update): for record in records: if record.metadata.get(self.SHOULD_PRINT): print(update, record) random_int_inlet_on = RandomIntInlet( metadata={ConditionalPrintOutlet.SHOULD_PRINT: True}) random_int_inlet_off = RandomIntInlet( metadata={ConditionalPrintOutlet.SHOULD_PRINT: False}) print_outlet = ConditionalPrintOutlet() link = Link([random_int_inlet_on, random_int_inlet_off], print_outlet, interval=timedelta(seconds=0.5), name='should_print_metadata') planner = SchedulePlanner(link, refresh_interval=0.5) planner.start()
class TestSchedulePlanner(TestCase): def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) logging.getLogger('databay').setLevel(logging.WARNING) @patch(fqname(Link), spec=Link) def setUp(self, link): self.planner = SchedulePlanner(refresh_interval=0.02) def set_job(job): link.job = job link.interval.total_seconds.return_value = 0.02 link.set_job.side_effect = set_job link.job = None self.link = link def tearDown(self): if len(schedule.jobs) > 0: schedule.clear() def test__run_job(self): self.planner._create_thread_pool() self.planner._run_job(self.link) self.link.transfer.assert_called_once() self.planner._destroy_thread_pool() def test__schedule(self): self.planner._schedule(self.link) self.assertIsNotNone(self.link.job, 'Link should contain a job') schedule_job = schedule.jobs[0] self.assertEqual(self.link.job, schedule_job, 'Link\'s job should be same as schedule\'s') # self.planner._unschedule(link) def test__unschedule(self): self.planner._schedule(self.link) self.planner._unschedule(self.link) self.assertIsNone(self.link.job, 'Link should not contain a job') self.assertEqual(len(schedule.jobs), 0, 'Schedule should not have any jobs') def test__unschedule_invalid(self): self.planner._unschedule(self.link) self.assertIsNone(self.link.job, 'Link should not contain a job') self.assertEqual(len(schedule.jobs), 0, 'Scheduler should not have any jobs') def test_add_links(self): self.planner.add_links(self.link) self.assertIsNotNone(self.link.job, 'Link should contain a job') self.assertTrue(self.link in self.planner.links, 'Planner should contain the link') def test_add_links_on_init(self): self.planner = SchedulePlanner(self.link, refresh_interval=0.02) self.assertIsNotNone(self.link.job, 'Link should contain a job') self.assertTrue(self.link in self.planner.links, 'Planner should contain the link') def test_remove_links(self): self.planner.add_links(self.link) self.planner.remove_links(self.link) self.assertIsNone(self.link.job, 'Link should not contain a job') self.assertTrue(self.link not in self.planner.links, 'Planner should not contain the link') def test_remove_invalid_link(self): self.assertRaises(MissingLinkError, self.planner.remove_links, self.link) self.assertIsNone(self.link.job, 'Link should not contain a job') self.assertTrue(self.link not in self.planner.links, 'Planner should not contain the link') def test_start(self): th = Thread(target=self.planner.start, daemon=True) th.start() self.assertTrue(self.planner._running, 'Planner should be running') self.planner.shutdown() th.join(timeout=2) self.assertFalse(th.is_alive(), 'Thread should be stopped.') def test_shutdown(self): th = Thread(target=self.planner.start, daemon=True) th.start() self.planner.shutdown() self.assertFalse(self.planner._running, 'Planner should be not running') self.assertIsNone(self.planner._thread_pool, 'Planner should not have a thread pool') th.join(timeout=2) self.assertFalse(th.is_alive(), 'Thread should be stopped.') def test_add_and_run(self): self.link.interval.total_seconds.return_value = 0.02 self.planner._refresh_interval = 0.02 self.planner.add_links(self.link) th = Thread(target=self.planner.start, daemon=True) th.start() time.sleep(0.04) self.link.transfer.assert_called() self.planner.shutdown() th.join(timeout=2) self.assertFalse(th.is_alive(), 'Thread should be stopped.') def test_invalid_interval(self): self.link.interval.total_seconds.return_value = 0.1 self.planner._refresh_interval = 0.2 self.assertRaises(ScheduleIntervalError, self.planner.add_links, self.link) def _with_exception(self, link, catch_exceptions): logging.getLogger('databay').setLevel(logging.CRITICAL) self.planner = SchedulePlanner(catch_exceptions=catch_exceptions) link.transfer.side_effect = DummyException() link.interval.total_seconds.return_value = 0.02 self.planner._refresh_interval = 0.02 link.transfer.side_effect = DummyException() link.interval.total_seconds.return_value = 0.02 self.planner.add_links(link) th = Thread(target=self.planner.start, daemon=True) th.start() time.sleep(0.04) link.transfer.assert_called() if catch_exceptions: self.assertTrue(self.planner.running, 'Scheduler should be running') self.planner.shutdown(False) th.join(timeout=2) self.assertFalse(th.is_alive(), 'Thread should be stopped.') self.assertFalse(self.planner.running, 'Scheduler should be stopped') def test_catch_exception(self): self._with_exception(self.link, True) def test_raise_exception(self): self._with_exception(self.link, False) def test_uncommon_exception(self): logging.getLogger('databay').setLevel(logging.CRITICAL) self.link.transfer.side_effect = DummyUnusualException(argA=123, argB=True) self.link.interval.total_seconds.return_value = 0.02 self.planner.add_links(self.link) th = Thread(target=self.planner.start, daemon=True) th.start() time.sleep(0.04) self.link.transfer.assert_called() self.assertFalse(self.planner.running, 'Scheduler should be stopped')