def run_task(self, source, date, weeks, offset=None, statuses=None): """ Run task with fake targets. Returns: the task output as a pandas dataframe. """ parsed_date = datetime.datetime.strptime(date, '%Y-%m-%d').date() # Make offsets None if it was not specified. task = EnrollmentsByWeek(name='fake_name', src='fake_source', offsets='fake_offsets' if offset else None, destination='fake_destination', date=parsed_date, weeks=weeks) # Mock the input and output targets def reformat(string): """Reformat string to make it like a TSV.""" return textwrap.dedent(string).strip().replace(' ', '\t') input_targets = { 'source': FakeTarget(reformat(source)), } # Mock offsets only if specified. if offset: input_targets.update({'offsets': FakeTarget(reformat(offset))}) # Mock statuses only if specified. if statuses: input_targets.update({'statuses': FakeTarget(reformat(statuses))}) task.input = MagicMock(return_value=input_targets) output_target = FakeTarget() task.output = MagicMock(return_value=output_target) # Run the task and parse the output into a pandas dataframe task.run() data = output_target.buffer.read() result = pandas.read_csv(StringIO(data), na_values=['-'], index_col='course_id') return result
def test_task_urls(self): date = datetime.date(2013, 01, 20) task = EnrollmentsByWeek(name='fake_name', src='s3://bucket/path/', offsets='s3://bucket/file.txt', destination='file://path/file.txt', date=date) requires = task.requires() source = requires['source'].output() offsets = requires['offsets'].output() self.assertIsInstance(offsets, luigi.hdfs.HdfsTarget) self.assertEqual(offsets.format, luigi.hdfs.Plain) destination = task.output() self.assertIsInstance(destination, luigi.File)
def test_task_urls(self): date = datetime.date(2013, 01, 20) task = EnrollmentsByWeek(name='fake_name', src=['s3://bucket/path/'], offsets='s3://bucket/file.txt', destination='file://path/file.txt', date=date) requires = task.requires() source = requires['source'].output() offsets = requires['offsets'].output() self.assertIsInstance(offsets, luigi.hdfs.HdfsTarget) self.assertEqual(offsets.format, luigi.hdfs.Plain) destination = task.output() self.assertIsInstance(destination, luigi.File)
def requires(self): """ Runs each task """ output_destination = url_path_join(self.destination, self.name, str(self.date)) if self.manifest_path is not None: manifest = url_path_join(self.manifest_path, "executive-reports", self.name, str(self.date)) else: manifest = None common_parameters = { "name": self.name, "src": self.src, "include": self.include, "manifest": manifest, "credentials": self.credentials, "blacklist": self.blacklist, "mapreduce_engine": self.mapreduce_engine, "lib_jar": self.lib_jar, "n_reduce_tasks": self.n_reduce_tasks, "destination": output_destination, "date": self.date, } yield ( WeeklyAllUsersAndEnrollments( offsets=self.offsets, history=self.history, weeks=TOTAL_USERS_AND_ENROLLMENTS_NUM_WEEKS, **common_parameters ), WeeklyIncrementalUsersAndEnrollments( offsets=self.offsets, history=self.history, weeks=WEEKLY_ENROLLMENT_REPORT_WEEKS, **common_parameters ), EnrollmentsByWeek( offsets=self.offsets, statuses=self.statuses, weeks=WEEKLY_ENROLLMENT_REPORT_WEEKS, **common_parameters ), DailyRegistrationsEnrollmentsAndCourses( days=DEFAULT_NUM_DAYS, **common_parameters ) )
def run_task(self, source, date, weeks, offset=None, statuses=None): """ Run task with fake targets. Returns: the task output as a pandas dataframe. """ parsed_date = datetime.datetime.strptime(date, '%Y-%m-%d').date() # Make offsets None if it was not specified. task = EnrollmentsByWeek(name='fake_name', src=['fake_source'], offsets='fake_offsets' if offset else None, destination='fake_destination', date=parsed_date, weeks=weeks) # Mock the input and output targets def reformat(string): """Reformat string to make it like a TSV.""" return textwrap.dedent(string).strip().replace(' ', '\t') input_targets = { 'source': FakeTarget(reformat(source)), } # Mock offsets only if specified. if offset: input_targets.update({'offsets': FakeTarget(reformat(offset))}) # Mock statuses only if specified. if statuses: input_targets.update({'statuses': FakeTarget(reformat(statuses))}) task.input = MagicMock(return_value=input_targets) output_target = FakeTarget() task.output = MagicMock(return_value=output_target) # Run the task and parse the output into a pandas dataframe task.run() data = output_target.buffer.read() result = pandas.read_csv(StringIO(data), na_values=['-'], index_col='course_id') return result