def create_task(self, credentials=None, source=None, overwrite=False, cls=CopyToVerticaDummyTable): """ Emulate execution of a generic VerticaCopyTask. """ # Make sure to flush the instance cache so we create # a new task object. luigi.task.Register.clear_instance_cache() task = cls(credentials=sentinel.ignored, overwrite=overwrite) if not credentials: credentials = '''\ { "host": "db.example.com", "port": 5433, "user": "******", "password": "******" }''' if not source: source = self._get_source_string(1) fake_input = { 'credentials': FakeTarget(textwrap.dedent(credentials)), 'insert_source': FakeTarget(textwrap.dedent(source)) } fake_output = MagicMock(return_value=self.mock_vertica_connector) task.input = MagicMock(return_value=fake_input) task.output = fake_output return task
def create_task(self, credentials=None, source=None, insert_chunk_size=100, overwrite=False, cls=InsertToMysqlDummyTable): """ Emulate execution of a generic MysqlTask. """ # Make sure to flush the instance cache so we create # a new task object. luigi.task.Register.clear_instance_cache() task = cls( credentials=sentinel.ignored, insert_chunk_size=insert_chunk_size, overwrite=overwrite ) if not credentials: credentials = '''\ { "host": "db.example.com", "port": "3306", "username": "******", "password": "******" }''' if not source: source = self._get_source_string(1) fake_input = { 'credentials': FakeTarget(value=textwrap.dedent(credentials)), 'insert_source': FakeTarget(value=textwrap.dedent(source)) } task.input = MagicMock(return_value=fake_input) return task
def run_task(self, registrations, enrollments, date, weeks, blacklist=None): """ Run task with fake targets. Returns: the task output as a pandas dataframe. """ parsed_date = datetime.datetime.strptime(date, '%Y-%m-%d').date() task = WeeklyIncrementalUsersAndEnrollments( name="fake_name", destination='fake_destination', date=parsed_date, weeks=weeks, blacklist=blacklist ) # Default missing inputs if registrations is None: registrations = """ 2013-01-01 10 2013-01-10 20 """ if enrollments is None: enrollments = """ course_1 2013-01-06 10 course_1 2013-01-14 10 """ # Mock the input and output targets def reformat(string): """Reformat string to make it like a TSV.""" return textwrap.dedent(string).strip().replace(' ', '\t') input_targets = { 'enrollments': FakeTarget(value=reformat(enrollments)), 'registrations': FakeTarget(value=reformat(registrations)), } # Mock blacklist only if specified. if blacklist: input_targets.update({'blacklist': FakeTarget(value=reformat(blacklist))}) task.input = MagicMock(return_value=input_targets) output_target = FakeTarget() task.output = MagicMock(return_value=output_target) # Run the task and parse the output into a pandas dataframe task.run() data = output_target.buffer.read() result = pandas.read_csv(StringIO(data), na_values=['-'], index_col=self.row_label('header')) return result
def run_task(self, credentials=None, query=None): """ Emulate execution of a generic MysqlSelectTask. """ if not credentials: credentials = '''\ { "host": "db.example.com", "port": "3306", "username": "******", "password": "******" }''' if not query: query = 'SELECT 1' # Create a dummy task that simply returns the parameters given class TestTask(MysqlSelectTask): """A generic MysqlSelectTask that wraps the parameters from the enclosing function""" database = "exampledata" @property def query(self): return query @property def filename(self): return None # pragma: no cover task = TestTask( credentials=sentinel.ignored, destination=sentinel.ignored ) fake_input = { 'credentials': FakeTarget(value=textwrap.dedent(credentials)) } task.input = MagicMock(return_value=fake_input) output_target = FakeTarget() task.output = MagicMock(return_value=output_target) task.run() try: parsed = read_csv(output_target.buffer, header=None, sep="\t", na_values=['-'], encoding='utf-8') except ValueError: parsed = None return parsed
def run_task(self, source, date, weeks, offset=None, statuses=None): """ Run task with fake targets. Returns: the task output as a pandas dataframe. """ parsed_date = datetime.datetime.strptime(date, '%Y-%m-%d').date() # Make offsets None if it was not specified. task = EnrollmentsByWeek(name='fake_name', src=['fake_source'], offsets='fake_offsets' if offset else None, destination='fake_destination', date=parsed_date, weeks=weeks) # Mock the input and output targets def reformat(string): """Reformat string to make it like a TSV.""" return textwrap.dedent(string).strip().replace(' ', '\t') input_targets = { 'source': FakeTarget(reformat(source)), } # Mock offsets only if specified. if offset: input_targets.update({'offsets': FakeTarget(reformat(offset))}) # Mock statuses only if specified. if statuses: input_targets.update({'statuses': FakeTarget(reformat(statuses))}) task.input = MagicMock(return_value=input_targets) output_target = FakeTarget() task.output = MagicMock(return_value=output_target) # Run the task and parse the output into a pandas dataframe task.run() data = output_target.buffer.read() result = pandas.read_csv(StringIO(data), na_values=['-'], index_col='course_id') return result
def run_task(self, source): """Helper utility for running task under test""" self.input_file = "catalog_test.json" with open(self.input_file, 'w') as fle: fle.write(source.encode('utf-8')) fake_warehouse_path = self.input_dir task = DailyProcessFromCatalogSubjectTask(warehouse_path=fake_warehouse_path, date=datetime.date(2015, 06, 25), catalog_path='') output_target = FakeTarget() task.output = MagicMock(return_value=output_target) class DummyInput(object): """A dummy input object to imitate the input to a luigi task.""" def __init__(self, filename): self.filename = filename def open(self, mode): """Opens the file this object is mocking a past task as having output.""" return open(self.filename, mode) input_dummy = DummyInput(self.input_file) task.input = MagicMock(return_value=input_dummy) task.run() results = pandas.read_table(output_target.buffer, sep='\t', header=None, names=['course_id', 'date', 'subject_uri', 'subject_title', 'subject_language']) return results
def create_task(self): self.task = self.task_class( output_root='/fake/output', auth_user_path='/fake/input1', auth_userprofile_path='/fake/input2', explicit_event_whitelist='explicit_events.tsv', ) explicit_event_list = """ admin browser edx.instructor.report.downloaded admin server add-forum-admin admin server add-forum-community-TA admin server add-forum-mod admin server add-instructor admin server list-staff enrollment server edx.course.enrollment.activated # problem server problem_rescore """ results = { 'explicit_events': FakeTarget(value=self.reformat(explicit_event_list)), } self.task.input_local = MagicMock(return_value=results) self.task.init_local() self.task.user_info_requirements = get_mock_user_info_requirements()
def run_task(self, source): """Helper utility for running task under test""" def reformat(string): """Reformat string to make it like a TSV.""" return textwrap.dedent(string).strip().replace(' ', '\t') with open(self.input_file, 'w') as fle: fle.write(reformat(source)) task = TotalEventsReport(counts=self.input_file, report='fake_report') output_target = FakeTarget() task.output = MagicMock(return_value=output_target) task.run() data = output_target.buffer.read() result = pandas.read_csv(StringIO(data), na_values=['-'], index_col=False, header=None, names=['date', 'count']) return result
def run_task(self, source): """Helper utility for running task under test""" self.input_file = "course_structure.json" with open(self.input_file, 'w') as fle: fle.write(source.encode('utf-8')) fake_warehouse_path = self.input_dir task = ProcessCourseStructureAPIData(warehouse_path=fake_warehouse_path, run_date=datetime.date(2015, 8, 25)) output_target = FakeTarget() task.output = MagicMock(return_value=output_target) class DummyInput(object): """A dummy input object to imitate the input to a luigi task.""" def __init__(self, filename): self.filename = filename def open(self, mode): """Opens the file this object is mocking a past task as having output.""" return open(self.filename, mode) input_dummy = DummyInput(self.input_file) task.input = MagicMock(return_value=input_dummy) task.run() results = pandas.read_table(output_target.buffer, sep='\t', header=None, names=['course_id', 'course_org_id', 'course_number', 'course_run', 'course_start', 'course_end', 'course_name']) return results
def setUp(self): self.task = PaypalTransactionsByDayTask( date=luigi.DateParameter().parse(self.DEFAULT_DATE), output_root='/fake/output', account_id='testing' ) self.output_target = FakeTarget() self.task.output = MagicMock(return_value=self.output_target)
def initialize_task(self, metric_ranges): """Given a list of metric ranges, setup the task by calling init_local""" metric_ranges_text = '\n'.join( [r.to_separated_values() for r in metric_ranges]) self.task.input_local = MagicMock( return_value={'range_data': FakeTarget(value=metric_ranges_text)}) self.task.init_local()
def test_manifest_file_construction(self, get_target_from_url_mock): fake_target = FakeTarget() get_target_from_url_mock.return_value = fake_target self.task.run() content = fake_target.buffer.read() self.assertEquals(content, self.SOURCE_URL + '\n')
def run_task(self, task, credentials=None): """Emulate execution of a Sqoop import from Mysql.""" if not credentials: credentials = '''\ { "host": "db.example.com", "port": "3306", "username": "******", "password": "******" }''' fake_input = { 'credentials': FakeTarget(value=textwrap.dedent(credentials)) } task.input = MagicMock(return_value=fake_input) metadata_output_target = FakeTarget() task.metadata_output = MagicMock(return_value=metadata_output_target) task.run()
def run_task(self, task_cls, source): """Runs the task with fake targets.""" task = task_cls( course=sentinel.ignored, output_directory=sentinel.ignored, data_directory=sentinel.ignored, auth_user_path=sentinel.ignored, auth_userprofile_path=sentinel.ignored, ) fake_input = {'data': [FakeTarget(value=source)]} task.input = MagicMock(return_value=fake_input) output_target = FakeTarget() task.output = MagicMock(return_value=output_target) task.user_info_requirements = get_mock_user_info_requirements() reset_user_info_for_testing() task.run() return output_target.buffer.read()
def run_task(self, counts): """ Run task with fake targets. Returns: the task output as a string. """ task = UsersPerCountryReport(counts='fake_counts', report='fake_report') def reformat(string): """Reformat string to make it like a TSV.""" return textwrap.dedent(string).strip().replace(' ', '\t') task.input = MagicMock(return_value=FakeTarget(value=reformat(counts))) output_target = FakeTarget() task.output = MagicMock(return_value=output_target) task.run() return output_target.buffer.read()
def _create_export_task(self, **kwargs): task = EventExportTask(mapreduce_engine='local', output_root='test://output/', config='test://config/default.yaml', source=['test://input/'], environment='prod', interval=Year.parse('2014'), gpg_key_dir='test://config/gpg-keys/', gpg_master_key='*****@*****.**', **kwargs) task.input_local = MagicMock( return_value=FakeTarget(self.CONFIGURATION)) return task
def setUp(self): self.task_class = EventTypeDistributionTask self.events_list = """ admin browser edx.instructor.report.downloaded admin server add-forum-admin admin server add-forum-community-TA admin server add-forum-mod admin server add-instructor """ input_target = FakeTarget(value=self.reformat(self.events_list)) super(EventTypeDistributionTaskMapTest, self).setUp() self.task.events_list_file_path = "fake_path" self.task.input_local = MagicMock(return_value=input_target) self.task.init_local() self.event_date = '2013-12-17' self.event_type = "test_event" self.event_source = "browser" self.event_category = "unknown" self.exported = False self.event_templates = { 'event': { "username": "******", "host": "test_host", "event_source": self.event_source, "event_type": self.event_type, "context": { "course_id": "course_id", "org_id": "org_id", "user_id": "user_id", }, "time": self.event_date, "ip": "127.0.0.1", "event": { "course_id": "course_id", "user_id": "user_id", "mode": "honor", } } } self.default_event_template = 'event' self.expected_key = (self.event_date, self.event_category, self.event_type, self.event_source, self.exported)
def generate_table(self, interval): """Generate a calendar table containing every date in the provided interval.""" output_target = FakeTarget() class TestCalendarTask(CalendarTask): output_root = None def output(self): return output_target c = TestCalendarTask(interval=interval) c.run() table = [] for line in output_target.buffer.getvalue().splitlines(): table.append(line.split('\t')) return table
def run_task(self): """Runs the task with fake targets.""" output_archive_root = tempfile.mkdtemp() self.addCleanup(shutil.rmtree, output_archive_root) with tempfile.NamedTemporaryFile() as tmp_input_archive: with tarfile.open(mode='w:gz', fileobj=tmp_input_archive) as input_archive_file: input_archive_file.add(self.archive_root, arcname='') tmp_input_archive.seek(0) task = obfuscate.CourseContentTask( course=sentinel.ignored, output_directory=sentinel.ignored, data_directory=sentinel.ignored, auth_user_path=sentinel.ignored, auth_userprofile_path=sentinel.ignored, ) fake_input = {'data': [LocalTarget(path=tmp_input_archive.name)]} task.input = MagicMock(return_value=fake_input) output_target = FakeTarget() task.output = MagicMock(return_value=output_target) task.user_info_requirements = get_mock_user_info_requirements() reset_user_info_for_testing() task.run() with tarfile.open( mode='r:gz', fileobj=output_target.buffer) as output_archive_file: output_archive_file.extractall(output_archive_root) self.output_course_root = os.path.join( output_archive_root, get_filename_safe_course_id(self.COURSE_ID))
def run_task(self, registrations, enrollments, date, weeks, offset=None, history=None, blacklist=None): """ Run task with fake targets. Returns: the task output as a pandas dataframe. """ parsed_date = datetime.datetime.strptime(date, '%Y-%m-%d').date() # Make offsets None if it was not specified. task = WeeklyAllUsersAndEnrollments( name='fake_name', n_reduce_tasks="fake_n_reduce_tasks", offsets='fake_offsets' if offset else None, history='fake_history' if history else None, destination='fake_destination', date=parsed_date, weeks=weeks, credentials=None, blacklist=blacklist) # Mock the input and output targets def reformat(string): """Reformat string to make it like a TSV.""" return textwrap.dedent(string).strip().replace(' ', '\t') if enrollments is None: enrollments = """ course_1 2013-03-01 1 course_1 2013-03-30 2 course_2 2013-03-07 1 course_2 2013-03-08 1 course_2 2013-03-10 1 course_2 2013-03-13 1 course_3 2013-03-15 1 course_3 2013-03-18 1 course_3 2013-03-19 1 """ input_targets = { 'enrollments': FakeTarget(value=reformat(enrollments)), 'registrations': FakeTarget(value=reformat(registrations)) } # Mock offsets only if specified. if offset: input_targets.update( {'offsets': FakeTarget(value=reformat(offset))}) # Mock history only if specified. if history: input_targets.update( {'history': FakeTarget(value=reformat(history))}) # Mock blacklist only if specified. if blacklist: input_targets.update( {'blacklist': FakeTarget(value=reformat(blacklist))}) task.input = MagicMock(return_value=input_targets) output_target = FakeTarget() task.output = MagicMock(return_value=output_target) # Run the task and parse the output into a pandas dataframe task.run() data = output_target.buffer.read() result = pandas.read_csv(StringIO(data), na_values=['-'], index_col=self.row_label('header')) return result