def create_task(self,
                    credentials=None,
                    source=None,
                    insert_chunk_size=100,
                    overwrite=False,
                    cls=InsertToMysqlDummyTable):
        """
         Emulate execution of a generic MysqlTask.
        """
        # Make sure to flush the instance cache so we create
        # a new task object.
        luigi.task.Register.clear_instance_cache()
        task = cls(credentials=sentinel.ignored,
                   insert_chunk_size=insert_chunk_size,
                   overwrite=overwrite)

        if not credentials:
            credentials = '''\
                {
                    "host": "db.example.com",
                    "port": "3306",
                    "username": "******",
                    "password": "******"
                }'''

        if not source:
            source = self._get_source_string(1)

        fake_input = {
            'credentials': FakeTarget(value=textwrap.dedent(credentials)),
            'insert_source': FakeTarget(value=textwrap.dedent(source))
        }

        task.input = MagicMock(return_value=fake_input)
        return task
Example #2
0
    def run_task(self, registrations, enrollments, date, weeks, blacklist=None):
        """
        Run task with fake targets.

        Returns:
            the task output as a pandas dataframe.
        """

        parsed_date = datetime.datetime.strptime(date, '%Y-%m-%d').date()

        task = WeeklyIncrementalUsersAndEnrollments(
            name="fake_name",
            destination='fake_destination',
            date=parsed_date,
            weeks=weeks,
            blacklist=blacklist
        )

        # Default missing inputs
        if registrations is None:
            registrations = """
                2013-01-01 10
                2013-01-10 20
                """
        if enrollments is None:
            enrollments = """
                course_1 2013-01-06 10
                course_1 2013-01-14 10
                """

        # Mock the input and output targets
        def reformat(string):
            """Reformat string to make it like a TSV."""
            return textwrap.dedent(string).strip().replace(' ', '\t')

        input_targets = {
            'enrollments': FakeTarget(value=reformat(enrollments)),
            'registrations': FakeTarget(value=reformat(registrations)),
        }

        # Mock blacklist only if specified.
        if blacklist:
            input_targets.update({'blacklist': FakeTarget(value=reformat(blacklist))})

        task.input = MagicMock(return_value=input_targets)

        output_target = FakeTarget()
        task.output = MagicMock(return_value=output_target)

        # Run the task and parse the output into a pandas dataframe

        task.run()

        data = output_target.buffer.read()
        result = pandas.read_csv(StringIO(data),
                                 na_values=['-'],
                                 index_col=self.row_label('header'))

        return result
    def run_task(self, source, date, weeks, offset=None, statuses=None):
        """
        Run task with fake targets.

        Returns:
            the task output as a pandas dataframe.
        """

        parsed_date = datetime.datetime.strptime(date, '%Y-%m-%d').date()

        # Make offsets None if it was not specified.
        task = EnrollmentsByWeek(name='fake_name',
                                 src=['fake_source'],
                                 offsets='fake_offsets' if offset else None,
                                 destination='fake_destination',
                                 date=parsed_date,
                                 weeks=weeks)

        # Mock the input and output targets

        def reformat(string):
            """Reformat string to make it like a TSV."""
            return textwrap.dedent(string).strip().replace(' ', '\t')

        input_targets = {
            'source': FakeTarget(value=reformat(source)),
        }

        # Mock offsets only if specified.
        if offset:
            input_targets.update(
                {'offsets': FakeTarget(value=reformat(offset))})

        # Mock statuses only if specified.
        if statuses:
            input_targets.update(
                {'statuses': FakeTarget(value=reformat(statuses))})

        task.input = MagicMock(return_value=input_targets)

        output_target = FakeTarget()
        task.output = MagicMock(return_value=output_target)

        # Run the task and parse the output into a pandas dataframe

        task.run()

        data = output_target.buffer.read()

        result = pandas.read_csv(StringIO(data),
                                 na_values=['-'],
                                 index_col='course_id')

        return result
    def run_task(self, credentials=None, query=None):
        """
        Emulate execution of a generic MysqlSelectTask.
        """
        if not credentials:
            credentials = '''\
                {
                    "host": "db.example.com",
                    "port": "3306",
                    "username": "******",
                    "password": "******"
                }'''

        if not query:
            query = 'SELECT 1'

        # Create a dummy task that simply returns the parameters given
        class TestTask(MysqlSelectTask):
            """A generic MysqlSelectTask that wraps the parameters from the enclosing function"""

            database = "exampledata"

            @property
            def query(self):
                return query

            @property
            def filename(self):
                return None  # pragma: no cover

        task = TestTask(credentials=sentinel.ignored,
                        destination=sentinel.ignored)

        fake_input = {
            'credentials': FakeTarget(value=textwrap.dedent(credentials))
        }
        task.input = MagicMock(return_value=fake_input)

        output_target = FakeTarget()
        task.output = MagicMock(return_value=output_target)

        task.run()

        try:
            parsed = read_csv(output_target.buffer,
                              header=None,
                              sep="\t",
                              na_values=['-'],
                              encoding='utf-8')
        except ValueError:
            parsed = None

        return parsed
Example #5
0
    def create_task(self):
        self.task = self.task_class(
            output_root='/fake/output',
            auth_user_path='/fake/input1',
            auth_userprofile_path='/fake/input2',
            explicit_event_whitelist='explicit_events.tsv',
        )
        explicit_event_list = """
            admin browser edx.instructor.report.downloaded
            admin server add-forum-admin

            admin server add-forum-community-TA
            admin server add-forum-mod
            admin server add-instructor
            admin server list-staff
            enrollment server edx.course.enrollment.activated
            # problem server problem_rescore
        """
        results = {
            'explicit_events': FakeTarget(value=self.reformat(explicit_event_list)),
        }
        self.task.input_local = MagicMock(return_value=results)
        self.task.init_local()
        self.task.geoip = FakeGeoLocation()

        self.task.user_info_requirements = get_mock_user_info_requirements()
 def setUp(self):
     self.task = PaypalTransactionsByDayTask(
         date=luigi.DateParameter().parse(self.DEFAULT_DATE),
         output_root='/fake/output',
         account_id='testing')
     self.output_target = FakeTarget()
     self.task.output = MagicMock(return_value=self.output_target)
Example #7
0
    def run_task(self, source):
        """Helper utility for running task under test"""

        def reformat(string):
            """Reformat string to make it like a TSV."""
            return textwrap.dedent(string).strip().replace(' ', '\t')

        with open(self.input_file, 'w') as fle:
            fle.write(reformat(source))

        task = TotalEventsReport(counts=self.input_file,
                                 report='fake_report')

        output_target = FakeTarget()
        task.output = MagicMock(return_value=output_target)
        task.run()
        data = output_target.buffer.read()

        result = pandas.read_csv(StringIO(data),
                                 na_values=['-'],
                                 index_col=False,
                                 header=None,
                                 names=['date', 'count'])

        return result
    def initialize_task(self, metric_ranges):
        """Given a list of metric ranges, setup the task by calling init_local"""
        metric_ranges_text = '\n'.join(
            [r.to_separated_values() for r in metric_ranges])

        self.task.input_local = MagicMock(
            return_value={'range_data': FakeTarget(value=metric_ranges_text)})
        self.task.init_local()
Example #9
0
    def run_task(self, task, credentials=None):
        """Emulate execution of a Sqoop import from Mysql."""
        if not credentials:
            credentials = '''\
                {
                    "host": "db.example.com",
                    "port": "3306",
                    "username": "******",
                    "password": "******"
                }'''
        fake_input = {
            'credentials': FakeTarget(value=textwrap.dedent(credentials))
        }
        task.input = MagicMock(return_value=fake_input)

        metadata_output_target = FakeTarget()
        task.metadata_output = MagicMock(return_value=metadata_output_target)

        task.run()
Example #10
0
    def run_task(self, task_cls, source):
        """Runs the task with fake targets."""

        task = task_cls(
            course=sentinel.ignored,
            output_directory=sentinel.ignored,
            data_directory=sentinel.ignored,
            auth_user_path=sentinel.ignored,
            auth_userprofile_path=sentinel.ignored,
        )

        fake_input = {'data': [FakeTarget(value=source)]}
        task.input = MagicMock(return_value=fake_input)

        output_target = FakeTarget()
        task.output = MagicMock(return_value=output_target)
        task.user_info_requirements = get_mock_user_info_requirements()
        reset_user_info_for_testing()
        task.run()
        return output_target.buffer.read()
Example #11
0
    def create_task(self, credentials=None):
        """
        Emulate execution of a generic RunVerticaSqlScriptTask.
        """
        # Make sure to flush the instance cache so we create a new task object.
        luigi.task.Register.clear_instance_cache()
        task = RunVerticaSqlScriptTask(
            credentials=sentinel.ignored,
            script_name='my simple script',
            source_script=sentinel.ignored,
        )

        if not credentials:
            credentials = '''\
                {
                    "host": "db.example.com",
                    "port": 5433,
                    "user": "******",
                    "password": "******"
                }'''

        # This SQL doesn't actually run, but I've used real SQL to provide context. :)
        source = '''
        DELETE TABLE my_schema.my_table;
        CREATE TABLE my_schema.my_table AS SELECT foo, bar, baz FROM my_schema.another_table;
        '''

        fake_input = {
            'credentials': FakeTarget(value=textwrap.dedent(credentials)),
            'source_script': FakeTarget(value=textwrap.dedent(source))
        }

        fake_output = MagicMock(return_value=self.mock_vertica_connector)
        self.mock_vertica_connector.marker_schema = "name_of_marker_schema"
        self.mock_vertica_connector.marker_table = "name_of_marker_table"

        task.input = MagicMock(return_value=fake_input)
        task.output = fake_output
        return task
    def _create_export_task(self, **kwargs):
        task = EventExportTask(mapreduce_engine='local',
                               output_root='test://output/',
                               config='test://config/default.yaml',
                               source=['test://input/'],
                               environment='prod',
                               interval=Year.parse('2014'),
                               gpg_key_dir='test://config/gpg-keys/',
                               gpg_master_key='*****@*****.**',
                               **kwargs)

        task.input_local = MagicMock(return_value=FakeTarget(
            value=self.CONFIGURATION))
        return task
Example #13
0
    def create_task(self,
                    credentials=None,
                    source=None,
                    overwrite=False,
                    cls=CopyToVerticaDummyTable):
        """
         Emulate execution of a generic VerticaCopyTask.
        """
        # Make sure to flush the instance cache so we create
        # a new task object.
        luigi.task.Register.clear_instance_cache()
        task = cls(credentials=sentinel.ignored, overwrite=overwrite)

        if not credentials:
            credentials = '''\
                {
                    "host": "db.example.com",
                    "port": 5433,
                    "user": "******",
                    "password": "******"
                }'''

        if not source:
            source = self._get_source_string(1)

        fake_input = {
            'credentials': FakeTarget(value=textwrap.dedent(credentials)),
            'insert_source': FakeTarget(value=textwrap.dedent(source))
        }

        fake_output = MagicMock(return_value=self.mock_vertica_connector)
        self.mock_vertica_connector.marker_schema = "name_of_marker_schema"
        self.mock_vertica_connector.marker_table = "name_of_marker_table"

        task.input = MagicMock(return_value=fake_input)
        task.output = fake_output
        return task
    def setUp(self):
        self.task_class = EventTypeDistributionTask

        self.events_list = """
        admin browser edx.instructor.report.downloaded
        admin server add-forum-admin

        admin server add-forum-community-TA
        admin server add-forum-mod
        admin server add-instructor
        """
        input_target = FakeTarget(value=self.reformat(self.events_list))

        super(EventTypeDistributionTaskMapTest, self).setUp()
        self.task.events_list_file_path = "fake_path"
        self.task.input_local = MagicMock(return_value=input_target)
        self.task.init_local()

        self.event_date = '2013-12-17'
        self.event_type = "test_event"
        self.event_source = "browser"
        self.event_category = "unknown"

        self.exported = False
        self.event_templates = {
            'event': {
                "username": "******",
                "host": "test_host",
                "event_source": self.event_source,
                "event_type": self.event_type,
                "context": {
                    "course_id": "course_id",
                    "org_id": "org_id",
                    "user_id": "user_id",
                },
                "time": self.event_date,
                "ip": "127.0.0.1",
                "event": {
                    "course_id": "course_id",
                    "user_id": "user_id",
                    "mode": "honor",
                }
            }
        }

        self.default_event_template = 'event'
        self.expected_key = (self.event_date, self.event_category,
                             self.event_type, self.event_source, self.exported)
    def generate_table(self, interval):
        """Generate a calendar table containing every date in the provided interval."""
        output_target = FakeTarget()

        class TestCalendarTask(CalendarTask):
            output_root = None

            def output(self):
                return output_target

        c = TestCalendarTask(interval=interval)
        c.run()

        table = []
        for line in output_target.buffer.getvalue().splitlines():
            table.append(line.split('\t'))

        return table
Example #16
0
    def run_task(self, source):
        """Helper utility for running task under test"""
        self.input_file = "catalog_test.json"
        with open(self.input_file, 'w') as fle:
            for line in source:
                outline = json.dumps(line)
                fle.write(outline.encode('utf-8'))
                fle.write('\n')
        fake_warehouse_path = self.input_dir

        task = CourseSubjectTask(
            warehouse_path=fake_warehouse_path,
            date=datetime.date(2015, 06, 25),
            # catalog_path=''
        )

        output_target = FakeTarget()
        task.output = MagicMock(return_value=output_target)

        class DummyInput(object):
            """A dummy input object to imitate the input to a luigi task."""
            def __init__(self, filename):
                self.filename = filename

            def open(self, mode):
                """Opens the file this object is mocking a past task as having output."""
                return open(self.filename, mode)

        input_dummy = DummyInput(self.input_file)
        task.input = MagicMock(return_value=input_dummy)
        task.run()
        results = pandas.read_table(output_target.buffer,
                                    sep='\t',
                                    header=None,
                                    names=[
                                        'course_id', 'date', 'subject_uri',
                                        'subject_title', 'subject_language'
                                    ])
        return results
Example #17
0
    def run_task(self):
        """Runs the task with fake targets."""

        output_archive_root = tempfile.mkdtemp()
        self.addCleanup(shutil.rmtree, output_archive_root)

        with tempfile.NamedTemporaryFile() as tmp_input_archive:
            with tarfile.open(mode='w:gz',
                              fileobj=tmp_input_archive) as input_archive_file:
                input_archive_file.add(self.archive_root, arcname='')
            tmp_input_archive.seek(0)

            task = obfuscate.CourseContentTask(
                course=sentinel.ignored,
                output_directory=sentinel.ignored,
                data_directory=sentinel.ignored,
                auth_user_path=sentinel.ignored,
                auth_userprofile_path=sentinel.ignored,
            )

            fake_input = {'data': [LocalTarget(path=tmp_input_archive.name)]}
            task.input = MagicMock(return_value=fake_input)

            output_target = FakeTarget()
            task.output = MagicMock(return_value=output_target)
            task.user_info_requirements = get_mock_user_info_requirements()
            reset_user_info_for_testing()
            task.run()

            with tarfile.open(
                    mode='r:gz',
                    fileobj=output_target.buffer) as output_archive_file:
                output_archive_file.extractall(output_archive_root)

        self.output_course_root = os.path.join(
            output_archive_root, get_filename_safe_course_id(self.COURSE_ID))
Example #18
0
    def run_task(self,
                 registrations,
                 enrollments,
                 date,
                 weeks,
                 offset=None,
                 history=None,
                 blacklist=None):
        """
        Run task with fake targets.

        Returns:
            the task output as a pandas dataframe.
        """

        parsed_date = datetime.datetime.strptime(date, '%Y-%m-%d').date()

        # Make offsets None if it was not specified.
        task = WeeklyAllUsersAndEnrollments(
            name='fake_name',
            n_reduce_tasks="fake_n_reduce_tasks",
            offsets='fake_offsets' if offset else None,
            history='fake_history' if history else None,
            destination='fake_destination',
            date=parsed_date,
            weeks=weeks,
            credentials=None,
            blacklist=blacklist)

        # Mock the input and output targets

        def reformat(string):
            """Reformat string to make it like a TSV."""
            return textwrap.dedent(string).strip().replace(' ', '\t')

        if enrollments is None:
            enrollments = """
                course_1 2013-03-01 1
                course_1 2013-03-30 2
                course_2 2013-03-07 1
                course_2 2013-03-08 1
                course_2 2013-03-10 1
                course_2 2013-03-13 1
                course_3 2013-03-15 1
                course_3 2013-03-18 1
                course_3 2013-03-19 1
                """

        input_targets = {
            'enrollments': FakeTarget(value=reformat(enrollments)),
            'registrations': FakeTarget(value=reformat(registrations))
        }

        # Mock offsets only if specified.
        if offset:
            input_targets.update(
                {'offsets': FakeTarget(value=reformat(offset))})

        # Mock history only if specified.
        if history:
            input_targets.update(
                {'history': FakeTarget(value=reformat(history))})

        # Mock blacklist only if specified.
        if blacklist:
            input_targets.update(
                {'blacklist': FakeTarget(value=reformat(blacklist))})

        task.input = MagicMock(return_value=input_targets)

        output_target = FakeTarget()
        task.output = MagicMock(return_value=output_target)

        # Run the task and parse the output into a pandas dataframe

        task.run()

        data = output_target.buffer.read()
        result = pandas.read_csv(StringIO(data),
                                 na_values=['-'],
                                 index_col=self.row_label('header'))

        return result