def test_connection_error(self, emails): sch = RemoteScheduler('http://tld.invalid:1337', connect_timeout=1) self.waits = 0 def dummy_wait(): self.waits += 1 sch._wait = dummy_wait class A(DummyTask): pass a = A() self.assertEqual(emails, []) with Worker(scheduler=sch) as worker: try: worker.add(a) except RPCError: self.assertEqual(self.waits, 2) # should attempt to add it 3 times self.assertNotEqual(emails, []) self.assertTrue(emails[0].find( "Luigi: Framework error while scheduling %s" % (a, )) != -1) else: self.fail()
def test_connection_error(self, emails): sch = RemoteScheduler(host="this_host_doesnt_exist", port=1337, connect_timeout=1) worker = Worker(scheduler=sch) self.waits = 0 def dummy_wait(): self.waits += 1 sch._wait = dummy_wait class A(DummyTask): pass a = A() self.assertEqual(emails, []) worker.add(a) self.assertEqual(self.waits, 2) # should attempt to add it 3 times self.assertNotEquals(emails, []) self.assertTrue( emails[0].find("Luigi: Framework error while scheduling %s" % (a, )) != -1) worker.stop()
def test_workflow(self): # set up directories: src_path = os.path.join(self.temp_rootdir, "src") os.mkdir(src_path) counts_path = os.path.join(self.temp_rootdir, "counts") os.mkdir(counts_path) report_path = os.path.join(self.temp_rootdir, "report.csv") data_filepath = os.path.join(self.temp_rootdir, "geoloc.dat") with open(data_filepath, 'w') as data_file: data_file.write("Dummy geolocation data.") # create input: log_filepath = os.path.join(src_path, "tracking.log") with open(log_filepath, 'w') as log_file: log_file.write(self._create_event_log_line()) log_file.write('\n') log_file.write( self._create_event_log_line(username="******", ip=FakeGeoLocation.ip_address_2)) log_file.write('\n') end_date = '2014-04-01' task = UsersPerCountryReportWorkflow( mapreduce_engine='local', name='test', src=[src_path], end_date=datetime.datetime.strptime(end_date, '%Y-%m-%d').date(), geolocation_data=data_filepath, counts=counts_path, report=report_path, ) worker = luigi.worker.Worker() worker.add(task) with patch( 'edx.analytics.tasks.user_location.pygeoip') as mock_pygeoip: mock_pygeoip.GeoIP = Mock(return_value=FakeGeoLocation()) worker.run() worker.stop() output_lines = [] with open(report_path) as report_file: output_lines = report_file.readlines() self.assertEquals(len(output_lines), 3) self.assertEquals(output_lines[0].strip('\n'), UsersPerCountryReport.create_header(end_date)) expected = UsersPerCountryReport.create_csv_entry( 0.5, 1, FakeGeoLocation.country_name_1, FakeGeoLocation.country_code_1) self.assertEquals(output_lines[1].strip('\n'), expected) expected = UsersPerCountryReport.create_csv_entry( 0.5, 1, FakeGeoLocation.country_name_2, FakeGeoLocation.country_code_2) self.assertEquals(output_lines[2].strip('\n'), expected)
def test_workflow(self): # set up directories: src_path = os.path.join(self.temp_rootdir, "src") os.mkdir(src_path) counts_path = os.path.join(self.temp_rootdir, "counts") os.mkdir(counts_path) report_path = os.path.join(self.temp_rootdir, "report.csv") data_filepath = os.path.join(self.temp_rootdir, "geoloc.dat") with open(data_filepath, 'w') as data_file: data_file.write("Dummy geolocation data.") # create input: log_filepath = os.path.join(src_path, "tracking.log") with open(log_filepath, 'w') as log_file: log_file.write(self._create_event_log_line()) log_file.write('\n') log_file.write(self._create_event_log_line(username="******", ip=FakeGeoLocation.ip_address_2)) log_file.write('\n') end_date = '2014-04-01' task = UsersPerCountryReportWorkflow( mapreduce_engine='local', name='test', src=[src_path], end_date=datetime.datetime.strptime(end_date, '%Y-%m-%d').date(), geolocation_data=data_filepath, counts=counts_path, report=report_path, ) worker = luigi.worker.Worker() worker.add(task) with patch('edx.analytics.tasks.user_location.pygeoip') as mock_pygeoip: mock_pygeoip.GeoIP = Mock(return_value=FakeGeoLocation()) worker.run() worker.stop() output_lines = [] with open(report_path) as report_file: output_lines = report_file.readlines() self.assertEquals(len(output_lines), 3) self.assertEquals(output_lines[0].strip('\n'), UsersPerCountryReport.create_header(end_date)) expected = UsersPerCountryReport.create_csv_entry( 0.5, 1, FakeGeoLocation.country_name_1, FakeGeoLocation.country_code_1 ) self.assertEquals(output_lines[1].strip('\n'), expected) expected = UsersPerCountryReport.create_csv_entry( 0.5, 1, FakeGeoLocation.country_name_2, FakeGeoLocation.country_code_2 ) self.assertEquals(output_lines[2].strip('\n'), expected)
def test_drop(self): """ Create a schema in the database and use a :py:class:`PgDropTask` to remove it. """ url = self.pgdb.url() # Manually create the test schema. create_schema(url=url, schema=self.test_schema) # Verify the test schema exists. self.assertTrue(schema_exists(url=url, schema=self.test_schema)) # Run the task to drop the schema. worker = luigi.worker.Worker() worker.add(PgDropSchemaTask(url=url, schema=self.test_schema)) worker.run() # Verify the schema is gone. self.assertFalse(schema_exists(url=url, schema=self.test_schema))
def _run_namespace_tasks(namespace, tasks=None, worker_scheduler_factory=None, override_defaults=None): """ :param namespace: :param worker_scheduler_factory: :param override_defaults: :return: True if all tasks and their dependencies were successfully run (or already completed); False if any error occurred. """ if worker_scheduler_factory is None: worker_scheduler_factory = _WorkerSchedulerFactory() if override_defaults is None: override_defaults = {} env_params = core(**override_defaults) # search for logging configuration path first on the command line, then # in the application config file logging_conf = env_params.logging_conf_file if logging_conf != '' and not os.path.exists(logging_conf): raise Exception( "Error: Unable to locate specified logging configuration file!" ) if not configuration.get_config().getboolean( 'core', 'no_configure_logging', False): setup_interface_logging(logging_conf, env_params.log_level) kill_signal = signal.SIGUSR1 if env_params.take_lock else None if (not env_params.no_lock and not(lock.acquire_for(env_params.lock_pid_dir, env_params.lock_size, kill_signal))): raise PidLockAlreadyTakenExit() if env_params.local_scheduler: raise RuntimeError('Cannot use namespace with local scheduler.') else: if env_params.scheduler_url != '': url = env_params.scheduler_url else: url = 'http://{host}:{port:d}/'.format( host=env_params.scheduler_host, port=env_params.scheduler_port, ) sch = worker_scheduler_factory.create_remote_scheduler(url=url) worker = worker_scheduler_factory.create_namespace_worker( scheduler=sch, worker_processes=env_params.workers, namespace=namespace, assistant=env_params.assistant) success = True logger = logging.getLogger('luigi-interface') with worker: if namespace == 'init': for t in tasks: success &= worker.add( t, env_params.parallel_scheduling, env_params.parallel_scheduling_processes) logger.info('Done scheduling tasks') else: logger.info('Worker setup for "{}" namespace'.format(namespace)) success &= worker.run() logger.info(execution_summary.summary(worker)) return dict(success=success, worker=worker)
def _schedule_and_run(tasks, worker_scheduler_factory=None, override_defaults=None): """ :param tasks: :param worker_scheduler_factory: :param override_defaults: :return: True if all tasks and their dependencies were successfully run (or already completed); False if any error occurred. """ if worker_scheduler_factory is None: worker_scheduler_factory = _WorkerSchedulerFactory() if override_defaults is None: override_defaults = {} env_params = core(**override_defaults) # search for logging configuration path first on the command line, then # in the application config file logging_conf = env_params.logging_conf_file if logging_conf is not None and not os.path.exists(logging_conf): raise Exception( "Error: Unable to locate specified logging configuration file!" ) if not configuration.get_config().getboolean( 'core', 'no_configure_logging', False): setup_interface_logging(logging_conf) kill_signal = signal.SIGUSR1 if env_params.take_lock else None if (not env_params.no_lock and not(lock.acquire_for(env_params.lock_pid_dir, env_params.lock_size, kill_signal))): raise PidLockAlreadyTakenExit() if env_params.local_scheduler: sch = worker_scheduler_factory.create_local_scheduler() else: if env_params.scheduler_url is not None: url = env_params.scheduler_url else: url = 'http://{host}:{port:d}/'.format( host=env_params.scheduler_host, port=env_params.scheduler_port, ) sch = worker_scheduler_factory.create_remote_scheduler(url=url) worker = worker_scheduler_factory.create_worker( scheduler=sch, worker_processes=env_params.workers, assistant=env_params.assistant) success = True logger = logging.getLogger('luigi-interface') with worker: for t in tasks: success &= worker.add(t, env_params.parallel_scheduling) logger.info('Done scheduling tasks') if env_params.workers != 0: success &= worker.run() logger.info(execution_summary.summary(worker)) return dict(success=success, worker=worker)
def _schedule_and_run(tasks, worker_scheduler_factory=None, override_defaults=None): """ :param tasks: :param worker_scheduler_factory: :param override_defaults: :return: True if all tasks and their dependencies were successfully run (or already completed); False if any error occurred. It will return a detailed response of type LuigiRunResult instead of a boolean if detailed_summary=True. """ if worker_scheduler_factory is None: worker_scheduler_factory = _WorkerSchedulerFactory() if override_defaults is None: override_defaults = {} env_params = core(**override_defaults) InterfaceLogging.setup(env_params) kill_signal = signal.SIGUSR1 if env_params.take_lock else None if (not env_params.no_lock and not (lock.acquire_for( env_params.lock_pid_dir, env_params.lock_size, kill_signal))): raise PidLockAlreadyTakenExit() if env_params.local_scheduler: sch = worker_scheduler_factory.create_local_scheduler() else: if env_params.scheduler_url != '': url = env_params.scheduler_url else: url = 'http://{host}:{port:d}/'.format( host=env_params.scheduler_host, port=env_params.scheduler_port, ) sch = worker_scheduler_factory.create_remote_scheduler(url=url) worker = worker_scheduler_factory.create_worker( scheduler=sch, worker_processes=env_params.workers, assistant=env_params.assistant) success = True logger = logging.getLogger('luigi-interface') with worker: for t in tasks: success &= worker.add(t, env_params.parallel_scheduling, env_params.parallel_scheduling_processes) logger.info('Done scheduling tasks') success &= worker.run() luigi_run_result = LuigiRunResult(worker, success) logger.info(luigi_run_result.summary_text) if hasattr(sch, 'close'): sch.close() return luigi_run_result
def test_connection_error(self, emails): sch = RemoteScheduler('http://tld.invalid:1337', connect_timeout=1) self.waits = 0 def dummy_wait(): self.waits += 1 sch._wait = dummy_wait class A(DummyTask): pass a = A() self.assertEqual(emails, []) with Worker(scheduler=sch) as worker: worker.add(a) self.assertEqual(self.waits, 2) # should attempt to add it 3 times self.assertNotEqual(emails, []) self.assertTrue(emails[0].find("Luigi: Framework error while scheduling %s" % (a,)) != -1)
def test_connection_error(self): sch = RemoteScheduler(host="this_host_doesnt_exist", port=1337) worker = Worker(scheduler=sch) self.waits = 0 def dummy_wait(): self.waits += 1 sch._wait = dummy_wait class A(DummyTask): pass a = A() self.assertEquals(self.last_email, None) worker.add(a) self.assertEquals(self.waits, sch._attempts - 1) self.assertNotEquals(self.last_email, None) self.assertEquals(self.last_email[0], "Luigi: Framework error while scheduling %s" % (a,))
def test_connection_error(self, emails): sch = RemoteScheduler(host="this_host_doesnt_exist", port=1337, connect_timeout=1) worker = Worker(scheduler=sch) self.waits = 0 def dummy_wait(): self.waits += 1 sch._wait = dummy_wait class A(DummyTask): pass a = A() self.assertEqual(emails, []) worker.add(a) self.assertEqual(self.waits, 2) # should attempt to add it 3 times self.assertNotEquals(emails, []) self.assertTrue(emails[0].find("Luigi: Framework error while scheduling %s" % (a,)) != -1) worker.stop()
def test_connection_error(self): sch = RemoteScheduler(host="this_host_doesnt_exist", port=1337) worker = Worker(scheduler=sch) self.waits = 0 def dummy_wait(): self.waits += 1 sch._wait = dummy_wait class A(DummyTask): pass a = A() self.assertEquals(self.last_email, None) worker.add(a) self.assertEquals(self.waits, 2) # should attempt to add it 3 times self.assertNotEquals(self.last_email, None) self.assertEquals(self.last_email[0], "Luigi: Framework error while scheduling %s" % (a,)) worker.stop()
def _schedule_and_run(tasks, worker_scheduler_factory=None, override_defaults=None): """ :param tasks: :param worker_scheduler_factory: :param override_defaults: :return: True if all tasks and their dependencies were successfully run (or already completed); False if any error occurred. It will return a detailed response of type LuigiRunResult instead of a boolean if detailed_summary=True. """ if worker_scheduler_factory is None: worker_scheduler_factory = _WorkerSchedulerFactory() if override_defaults is None: override_defaults = {} env_params = core(**override_defaults) InterfaceLogging.setup(env_params) kill_signal = signal.SIGUSR1 if env_params.take_lock else None if (not env_params.no_lock and not(lock.acquire_for(env_params.lock_pid_dir, env_params.lock_size, kill_signal))): raise PidLockAlreadyTakenExit() if env_params.local_scheduler: sch = worker_scheduler_factory.create_local_scheduler() else: if env_params.scheduler_url != '': url = env_params.scheduler_url else: url = 'http://{host}:{port:d}/'.format( host=env_params.scheduler_host, port=env_params.scheduler_port, ) sch = worker_scheduler_factory.create_remote_scheduler(url=url) worker = worker_scheduler_factory.create_worker( scheduler=sch, worker_processes=env_params.workers, assistant=env_params.assistant) success = True logger = logging.getLogger('luigi-interface') with worker: for t in tasks: success &= worker.add(t, env_params.parallel_scheduling, env_params.parallel_scheduling_processes) logger.info('Done scheduling tasks') success &= worker.run() luigi_run_result = LuigiRunResult(worker, success) logger.info(luigi_run_result.summary_text) return luigi_run_result
def test_arrange_act_assert(self, _): worker = luigi.worker.Worker() worker.add({{cookiecutter.task_name}}()) worker.run() self.assertEqual(True, True)