def run_task(self, source, date, weeks, offset=None, statuses=None): """ Run task with fake targets. Returns: the task output as a pandas dataframe. """ parsed_date = datetime.datetime.strptime(date, '%Y-%m-%d').date() # Make offsets None if it was not specified. task = EnrollmentsByWeek(name='fake_name', src='fake_source', offsets='fake_offsets' if offset else None, destination='fake_destination', date=parsed_date, weeks=weeks) # Mock the input and output targets def reformat(string): """Reformat string to make it like a TSV.""" return textwrap.dedent(string).strip().replace(' ', '\t') input_targets = { 'source': FakeTarget(reformat(source)), } # Mock offsets only if specified. if offset: input_targets.update({'offsets': FakeTarget(reformat(offset))}) # Mock statuses only if specified. if statuses: input_targets.update({'statuses': FakeTarget(reformat(statuses))}) task.input = MagicMock(return_value=input_targets) output_target = FakeTarget() task.output = MagicMock(return_value=output_target) # Run the task and parse the output into a pandas dataframe task.run() data = output_target.buffer.read() result = pandas.read_csv(StringIO(data), na_values=['-'], index_col='course_id') return result
def run_task(self, source, date, weeks, offset=None, statuses=None): """ Run task with fake targets. Returns: the task output as a pandas dataframe. """ parsed_date = datetime.datetime.strptime(date, '%Y-%m-%d').date() # Make offsets None if it was not specified. task = EnrollmentsByWeek(name='fake_name', src=['fake_source'], offsets='fake_offsets' if offset else None, destination='fake_destination', date=parsed_date, weeks=weeks) # Mock the input and output targets def reformat(string): """Reformat string to make it like a TSV.""" return textwrap.dedent(string).strip().replace(' ', '\t') input_targets = { 'source': FakeTarget(reformat(source)), } # Mock offsets only if specified. if offset: input_targets.update({'offsets': FakeTarget(reformat(offset))}) # Mock statuses only if specified. if statuses: input_targets.update({'statuses': FakeTarget(reformat(statuses))}) task.input = MagicMock(return_value=input_targets) output_target = FakeTarget() task.output = MagicMock(return_value=output_target) # Run the task and parse the output into a pandas dataframe task.run() data = output_target.buffer.read() result = pandas.read_csv(StringIO(data), na_values=['-'], index_col='course_id') return result
def test_task_urls(self): date = datetime.date(2013, 01, 20) task = EnrollmentsByWeek(name='fake_name', src='s3://bucket/path/', offsets='s3://bucket/file.txt', destination='file://path/file.txt', date=date) requires = task.requires() source = requires['source'].output() offsets = requires['offsets'].output() self.assertIsInstance(offsets, luigi.hdfs.HdfsTarget) self.assertEqual(offsets.format, luigi.hdfs.Plain) destination = task.output() self.assertIsInstance(destination, luigi.File)
def test_task_urls(self): date = datetime.date(2013, 01, 20) task = EnrollmentsByWeek(name='fake_name', src=['s3://bucket/path/'], offsets='s3://bucket/file.txt', destination='file://path/file.txt', date=date) requires = task.requires() source = requires['source'].output() offsets = requires['offsets'].output() self.assertIsInstance(offsets, luigi.hdfs.HdfsTarget) self.assertEqual(offsets.format, luigi.hdfs.Plain) destination = task.output() self.assertIsInstance(destination, luigi.File)