def run_task(self, source, date, weeks, offset=None, statuses=None):
        """
        Run task with fake targets.

        Returns:
            the task output as a pandas dataframe.
        """

        parsed_date = datetime.datetime.strptime(date, '%Y-%m-%d').date()

        # Make offsets None if it was not specified.
        task = EnrollmentsByWeek(name='fake_name',
                                 src='fake_source',
                                 offsets='fake_offsets' if offset else None,
                                 destination='fake_destination',
                                 date=parsed_date,
                                 weeks=weeks)

        # Mock the input and output targets

        def reformat(string):
            """Reformat string to make it like a TSV."""
            return textwrap.dedent(string).strip().replace(' ', '\t')

        input_targets = {
            'source': FakeTarget(reformat(source)),
        }

        # Mock offsets only if specified.
        if offset:
            input_targets.update({'offsets': FakeTarget(reformat(offset))})

        # Mock statuses only if specified.
        if statuses:
            input_targets.update({'statuses': FakeTarget(reformat(statuses))})

        task.input = MagicMock(return_value=input_targets)

        output_target = FakeTarget()
        task.output = MagicMock(return_value=output_target)

        # Run the task and parse the output into a pandas dataframe

        task.run()

        data = output_target.buffer.read()

        result = pandas.read_csv(StringIO(data),
                                 na_values=['-'],
                                 index_col='course_id')

        return result
Exemple #2
0
    def run_task(self, source, date, weeks, offset=None, statuses=None):
        """
        Run task with fake targets.

        Returns:
            the task output as a pandas dataframe.
        """

        parsed_date = datetime.datetime.strptime(date, '%Y-%m-%d').date()

        # Make offsets None if it was not specified.
        task = EnrollmentsByWeek(name='fake_name',
                                 src=['fake_source'],
                                 offsets='fake_offsets' if offset else None,
                                 destination='fake_destination',
                                 date=parsed_date,
                                 weeks=weeks)

        # Mock the input and output targets

        def reformat(string):
            """Reformat string to make it like a TSV."""
            return textwrap.dedent(string).strip().replace(' ', '\t')

        input_targets = {
            'source': FakeTarget(reformat(source)),
        }

        # Mock offsets only if specified.
        if offset:
            input_targets.update({'offsets': FakeTarget(reformat(offset))})

        # Mock statuses only if specified.
        if statuses:
            input_targets.update({'statuses': FakeTarget(reformat(statuses))})

        task.input = MagicMock(return_value=input_targets)

        output_target = FakeTarget()
        task.output = MagicMock(return_value=output_target)

        # Run the task and parse the output into a pandas dataframe

        task.run()

        data = output_target.buffer.read()

        result = pandas.read_csv(StringIO(data),
                                 na_values=['-'],
                                 index_col='course_id')

        return result
    def test_task_urls(self):
        date = datetime.date(2013, 01, 20)

        task = EnrollmentsByWeek(name='fake_name',
                                 src='s3://bucket/path/',
                                 offsets='s3://bucket/file.txt',
                                 destination='file://path/file.txt',
                                 date=date)

        requires = task.requires()

        source = requires['source'].output()
        offsets = requires['offsets'].output()
        self.assertIsInstance(offsets, luigi.hdfs.HdfsTarget)
        self.assertEqual(offsets.format, luigi.hdfs.Plain)

        destination = task.output()
        self.assertIsInstance(destination, luigi.File)
Exemple #4
0
    def test_task_urls(self):
        date = datetime.date(2013, 01, 20)

        task = EnrollmentsByWeek(name='fake_name',
                                 src=['s3://bucket/path/'],
                                 offsets='s3://bucket/file.txt',
                                 destination='file://path/file.txt',
                                 date=date)

        requires = task.requires()

        source = requires['source'].output()
        offsets = requires['offsets'].output()
        self.assertIsInstance(offsets, luigi.hdfs.HdfsTarget)
        self.assertEqual(offsets.format, luigi.hdfs.Plain)

        destination = task.output()
        self.assertIsInstance(destination, luigi.File)