Example #1
0
    def test_connection_error(self, emails):
        sch = RemoteScheduler('http://tld.invalid:1337', connect_timeout=1)

        self.waits = 0

        def dummy_wait():
            self.waits += 1

        sch._wait = dummy_wait

        class A(DummyTask):
            pass

        a = A()
        self.assertEqual(emails, [])
        with Worker(scheduler=sch) as worker:
            try:
                worker.add(a)
            except RPCError:
                self.assertEqual(self.waits,
                                 2)  # should attempt to add it 3 times
                self.assertNotEqual(emails, [])
                self.assertTrue(emails[0].find(
                    "Luigi: Framework error while scheduling %s" %
                    (a, )) != -1)
            else:
                self.fail()
Example #2
0
    def test_connection_error(self, emails):
        sch = RemoteScheduler(host="this_host_doesnt_exist",
                              port=1337,
                              connect_timeout=1)
        worker = Worker(scheduler=sch)

        self.waits = 0

        def dummy_wait():
            self.waits += 1

        sch._wait = dummy_wait

        class A(DummyTask):
            pass

        a = A()
        self.assertEqual(emails, [])
        worker.add(a)
        self.assertEqual(self.waits, 2)  # should attempt to add it 3 times
        self.assertNotEquals(emails, [])
        self.assertTrue(
            emails[0].find("Luigi: Framework error while scheduling %s" %
                           (a, )) != -1)
        worker.stop()
    def test_workflow(self):
        # set up directories:
        src_path = os.path.join(self.temp_rootdir, "src")
        os.mkdir(src_path)
        counts_path = os.path.join(self.temp_rootdir, "counts")
        os.mkdir(counts_path)
        report_path = os.path.join(self.temp_rootdir, "report.csv")
        data_filepath = os.path.join(self.temp_rootdir, "geoloc.dat")
        with open(data_filepath, 'w') as data_file:
            data_file.write("Dummy geolocation data.")

        # create input:
        log_filepath = os.path.join(src_path, "tracking.log")
        with open(log_filepath, 'w') as log_file:
            log_file.write(self._create_event_log_line())
            log_file.write('\n')
            log_file.write(
                self._create_event_log_line(username="******",
                                            ip=FakeGeoLocation.ip_address_2))
            log_file.write('\n')

        end_date = '2014-04-01'
        task = UsersPerCountryReportWorkflow(
            mapreduce_engine='local',
            name='test',
            src=[src_path],
            end_date=datetime.datetime.strptime(end_date, '%Y-%m-%d').date(),
            geolocation_data=data_filepath,
            counts=counts_path,
            report=report_path,
        )
        worker = luigi.worker.Worker()
        worker.add(task)
        with patch(
                'edx.analytics.tasks.user_location.pygeoip') as mock_pygeoip:
            mock_pygeoip.GeoIP = Mock(return_value=FakeGeoLocation())
            worker.run()
        worker.stop()

        output_lines = []
        with open(report_path) as report_file:
            output_lines = report_file.readlines()

        self.assertEquals(len(output_lines), 3)
        self.assertEquals(output_lines[0].strip('\n'),
                          UsersPerCountryReport.create_header(end_date))
        expected = UsersPerCountryReport.create_csv_entry(
            0.5, 1, FakeGeoLocation.country_name_1,
            FakeGeoLocation.country_code_1)
        self.assertEquals(output_lines[1].strip('\n'), expected)
        expected = UsersPerCountryReport.create_csv_entry(
            0.5, 1, FakeGeoLocation.country_name_2,
            FakeGeoLocation.country_code_2)
        self.assertEquals(output_lines[2].strip('\n'), expected)
    def test_workflow(self):
        # set up directories:
        src_path = os.path.join(self.temp_rootdir, "src")
        os.mkdir(src_path)
        counts_path = os.path.join(self.temp_rootdir, "counts")
        os.mkdir(counts_path)
        report_path = os.path.join(self.temp_rootdir, "report.csv")
        data_filepath = os.path.join(self.temp_rootdir, "geoloc.dat")
        with open(data_filepath, 'w') as data_file:
            data_file.write("Dummy geolocation data.")

        # create input:
        log_filepath = os.path.join(src_path, "tracking.log")
        with open(log_filepath, 'w') as log_file:
            log_file.write(self._create_event_log_line())
            log_file.write('\n')
            log_file.write(self._create_event_log_line(username="******", ip=FakeGeoLocation.ip_address_2))
            log_file.write('\n')

        end_date = '2014-04-01'
        task = UsersPerCountryReportWorkflow(
            mapreduce_engine='local',
            name='test',
            src=[src_path],
            end_date=datetime.datetime.strptime(end_date, '%Y-%m-%d').date(),
            geolocation_data=data_filepath,
            counts=counts_path,
            report=report_path,
        )
        worker = luigi.worker.Worker()
        worker.add(task)
        with patch('edx.analytics.tasks.user_location.pygeoip') as mock_pygeoip:
            mock_pygeoip.GeoIP = Mock(return_value=FakeGeoLocation())
            worker.run()
        worker.stop()

        output_lines = []
        with open(report_path) as report_file:
            output_lines = report_file.readlines()

        self.assertEquals(len(output_lines), 3)
        self.assertEquals(output_lines[0].strip('\n'), UsersPerCountryReport.create_header(end_date))
        expected = UsersPerCountryReport.create_csv_entry(
            0.5, 1, FakeGeoLocation.country_name_1, FakeGeoLocation.country_code_1
        )
        self.assertEquals(output_lines[1].strip('\n'), expected)
        expected = UsersPerCountryReport.create_csv_entry(
            0.5, 1, FakeGeoLocation.country_name_2, FakeGeoLocation.country_code_2
        )
        self.assertEquals(output_lines[2].strip('\n'), expected)
Example #5
0
 def test_drop(self):
     """
     Create a schema in the database and use a :py:class:`PgDropTask` to
     remove it.
     """
     url = self.pgdb.url()
     # Manually create the test schema.
     create_schema(url=url, schema=self.test_schema)
     # Verify the test schema exists.
     self.assertTrue(schema_exists(url=url, schema=self.test_schema))
     # Run the task to drop the schema.
     worker = luigi.worker.Worker()
     worker.add(PgDropSchemaTask(url=url, schema=self.test_schema))
     worker.run()
     # Verify the schema is gone.
     self.assertFalse(schema_exists(url=url, schema=self.test_schema))
Example #6
0
def _run_namespace_tasks(namespace, tasks=None, worker_scheduler_factory=None, override_defaults=None):
    """
    :param namespace:
    :param worker_scheduler_factory:
    :param override_defaults:
    :return: True if all tasks and their dependencies were successfully run (or already completed);
             False if any error occurred.
    """

    if worker_scheduler_factory is None:
        worker_scheduler_factory = _WorkerSchedulerFactory()
    if override_defaults is None:
        override_defaults = {}
    env_params = core(**override_defaults)
    # search for logging configuration path first on the command line, then
    # in the application config file
    logging_conf = env_params.logging_conf_file
    if logging_conf != '' and not os.path.exists(logging_conf):
        raise Exception(
            "Error: Unable to locate specified logging configuration file!"
        )

    if not configuration.get_config().getboolean(
            'core', 'no_configure_logging', False):
        setup_interface_logging(logging_conf, env_params.log_level)

    kill_signal = signal.SIGUSR1 if env_params.take_lock else None
    if (not env_params.no_lock and
            not(lock.acquire_for(env_params.lock_pid_dir, env_params.lock_size, kill_signal))):
        raise PidLockAlreadyTakenExit()

    if env_params.local_scheduler:
        raise RuntimeError('Cannot use namespace with local scheduler.')
    else:
        if env_params.scheduler_url != '':
            url = env_params.scheduler_url
        else:
            url = 'http://{host}:{port:d}/'.format(
                host=env_params.scheduler_host,
                port=env_params.scheduler_port,
            )
        sch = worker_scheduler_factory.create_remote_scheduler(url=url)

    worker = worker_scheduler_factory.create_namespace_worker(
        scheduler=sch, worker_processes=env_params.workers, namespace=namespace, assistant=env_params.assistant)

    success = True
    logger = logging.getLogger('luigi-interface')
    with worker:
        if namespace == 'init':
            for t in tasks:
                success &= worker.add(
                    t, env_params.parallel_scheduling, env_params.parallel_scheduling_processes)
            logger.info('Done scheduling tasks')
        else:
            logger.info('Worker setup for "{}" namespace'.format(namespace))
        success &= worker.run()
    logger.info(execution_summary.summary(worker))
    return dict(success=success, worker=worker)
Example #7
0
def _schedule_and_run(tasks, worker_scheduler_factory=None, override_defaults=None):
    """
    :param tasks:
    :param worker_scheduler_factory:
    :param override_defaults:
    :return: True if all tasks and their dependencies were successfully run (or already completed);
             False if any error occurred.
    """

    if worker_scheduler_factory is None:
        worker_scheduler_factory = _WorkerSchedulerFactory()
    if override_defaults is None:
        override_defaults = {}
    env_params = core(**override_defaults)
    # search for logging configuration path first on the command line, then
    # in the application config file
    logging_conf = env_params.logging_conf_file
    if logging_conf is not None and not os.path.exists(logging_conf):
        raise Exception(
            "Error: Unable to locate specified logging configuration file!"
        )

    if not configuration.get_config().getboolean(
            'core', 'no_configure_logging', False):
        setup_interface_logging(logging_conf)

    kill_signal = signal.SIGUSR1 if env_params.take_lock else None
    if (not env_params.no_lock and
            not(lock.acquire_for(env_params.lock_pid_dir, env_params.lock_size, kill_signal))):
        raise PidLockAlreadyTakenExit()

    if env_params.local_scheduler:
        sch = worker_scheduler_factory.create_local_scheduler()
    else:
        if env_params.scheduler_url is not None:
            url = env_params.scheduler_url
        else:
            url = 'http://{host}:{port:d}/'.format(
                host=env_params.scheduler_host,
                port=env_params.scheduler_port,
            )
        sch = worker_scheduler_factory.create_remote_scheduler(url=url)

    worker = worker_scheduler_factory.create_worker(
        scheduler=sch, worker_processes=env_params.workers, assistant=env_params.assistant)

    success = True
    logger = logging.getLogger('luigi-interface')
    with worker:
        for t in tasks:
            success &= worker.add(t, env_params.parallel_scheduling)
        logger.info('Done scheduling tasks')
        if env_params.workers != 0:
            success &= worker.run()
    logger.info(execution_summary.summary(worker))
    return dict(success=success, worker=worker)
Example #8
0
def _schedule_and_run(tasks,
                      worker_scheduler_factory=None,
                      override_defaults=None):
    """
    :param tasks:
    :param worker_scheduler_factory:
    :param override_defaults:
    :return: True if all tasks and their dependencies were successfully run (or already completed);
             False if any error occurred. It will return a detailed response of type LuigiRunResult
             instead of a boolean if detailed_summary=True.
    """

    if worker_scheduler_factory is None:
        worker_scheduler_factory = _WorkerSchedulerFactory()
    if override_defaults is None:
        override_defaults = {}
    env_params = core(**override_defaults)

    InterfaceLogging.setup(env_params)

    kill_signal = signal.SIGUSR1 if env_params.take_lock else None
    if (not env_params.no_lock and not (lock.acquire_for(
            env_params.lock_pid_dir, env_params.lock_size, kill_signal))):
        raise PidLockAlreadyTakenExit()

    if env_params.local_scheduler:
        sch = worker_scheduler_factory.create_local_scheduler()
    else:
        if env_params.scheduler_url != '':
            url = env_params.scheduler_url
        else:
            url = 'http://{host}:{port:d}/'.format(
                host=env_params.scheduler_host,
                port=env_params.scheduler_port,
            )
        sch = worker_scheduler_factory.create_remote_scheduler(url=url)

    worker = worker_scheduler_factory.create_worker(
        scheduler=sch,
        worker_processes=env_params.workers,
        assistant=env_params.assistant)

    success = True
    logger = logging.getLogger('luigi-interface')
    with worker:
        for t in tasks:
            success &= worker.add(t, env_params.parallel_scheduling,
                                  env_params.parallel_scheduling_processes)
        logger.info('Done scheduling tasks')
        success &= worker.run()
    luigi_run_result = LuigiRunResult(worker, success)
    logger.info(luigi_run_result.summary_text)
    if hasattr(sch, 'close'):
        sch.close()
    return luigi_run_result
Example #9
0
    def test_connection_error(self, emails):
        sch = RemoteScheduler('http://tld.invalid:1337', connect_timeout=1)

        self.waits = 0

        def dummy_wait():
            self.waits += 1

        sch._wait = dummy_wait

        class A(DummyTask):
            pass

        a = A()
        self.assertEqual(emails, [])
        with Worker(scheduler=sch) as worker:
            worker.add(a)
            self.assertEqual(self.waits, 2)  # should attempt to add it 3 times
            self.assertNotEqual(emails, [])
            self.assertTrue(emails[0].find("Luigi: Framework error while scheduling %s" % (a,)) != -1)
Example #10
0
    def test_connection_error(self):
        sch = RemoteScheduler(host="this_host_doesnt_exist", port=1337)
        worker = Worker(scheduler=sch)

        self.waits = 0

        def dummy_wait():
            self.waits += 1

        sch._wait = dummy_wait

        class A(DummyTask):
            pass

        a = A()
        self.assertEquals(self.last_email, None)
        worker.add(a)
        self.assertEquals(self.waits, sch._attempts - 1)
        self.assertNotEquals(self.last_email, None)
        self.assertEquals(self.last_email[0], "Luigi: Framework error while scheduling %s" % (a,))
Example #11
0
    def test_connection_error(self, emails):
        sch = RemoteScheduler(host="this_host_doesnt_exist", port=1337, connect_timeout=1)
        worker = Worker(scheduler=sch)

        self.waits = 0

        def dummy_wait():
            self.waits += 1

        sch._wait = dummy_wait

        class A(DummyTask):
            pass

        a = A()
        self.assertEqual(emails, [])
        worker.add(a)
        self.assertEqual(self.waits, 2)  # should attempt to add it 3 times
        self.assertNotEquals(emails, [])
        self.assertTrue(emails[0].find("Luigi: Framework error while scheduling %s" % (a,)) != -1)
        worker.stop()
Example #12
0
    def test_connection_error(self):
        sch = RemoteScheduler(host="this_host_doesnt_exist", port=1337)
        worker = Worker(scheduler=sch)

        self.waits = 0

        def dummy_wait():
            self.waits += 1

        sch._wait = dummy_wait

        class A(DummyTask):
            pass

        a = A()
        self.assertEquals(self.last_email, None)
        worker.add(a)
        self.assertEquals(self.waits, 2)  # should attempt to add it 3 times
        self.assertNotEquals(self.last_email, None)
        self.assertEquals(self.last_email[0], "Luigi: Framework error while scheduling %s" % (a,))
        worker.stop()
Example #13
0
def _schedule_and_run(tasks, worker_scheduler_factory=None, override_defaults=None):
    """
    :param tasks:
    :param worker_scheduler_factory:
    :param override_defaults:
    :return: True if all tasks and their dependencies were successfully run (or already completed);
             False if any error occurred. It will return a detailed response of type LuigiRunResult
             instead of a boolean if detailed_summary=True.
    """

    if worker_scheduler_factory is None:
        worker_scheduler_factory = _WorkerSchedulerFactory()
    if override_defaults is None:
        override_defaults = {}
    env_params = core(**override_defaults)

    InterfaceLogging.setup(env_params)

    kill_signal = signal.SIGUSR1 if env_params.take_lock else None
    if (not env_params.no_lock and
            not(lock.acquire_for(env_params.lock_pid_dir, env_params.lock_size, kill_signal))):
        raise PidLockAlreadyTakenExit()

    if env_params.local_scheduler:
        sch = worker_scheduler_factory.create_local_scheduler()
    else:
        if env_params.scheduler_url != '':
            url = env_params.scheduler_url
        else:
            url = 'http://{host}:{port:d}/'.format(
                host=env_params.scheduler_host,
                port=env_params.scheduler_port,
            )
        sch = worker_scheduler_factory.create_remote_scheduler(url=url)

    worker = worker_scheduler_factory.create_worker(
        scheduler=sch, worker_processes=env_params.workers, assistant=env_params.assistant)

    success = True
    logger = logging.getLogger('luigi-interface')
    with worker:
        for t in tasks:
            success &= worker.add(t, env_params.parallel_scheduling, env_params.parallel_scheduling_processes)
        logger.info('Done scheduling tasks')
        success &= worker.run()
    luigi_run_result = LuigiRunResult(worker, success)
    logger.info(luigi_run_result.summary_text)
    return luigi_run_result
Example #14
0
 def test_arrange_act_assert(self, _):
     worker = luigi.worker.Worker()
     worker.add({{cookiecutter.task_name}}())
     worker.run()
     self.assertEqual(True, True)