def test_workflow(self): # set up directories: src_path = os.path.join(self.temp_rootdir, "src") os.mkdir(src_path) counts_path = os.path.join(self.temp_rootdir, "counts") os.mkdir(counts_path) report_path = os.path.join(self.temp_rootdir, "report.csv") data_filepath = os.path.join(self.temp_rootdir, "geoloc.dat") with open(data_filepath, 'w') as data_file: data_file.write("Dummy geolocation data.") # create input: log_filepath = os.path.join(src_path, "tracking.log") with open(log_filepath, 'w') as log_file: log_file.write(self._create_event_log_line()) log_file.write('\n') log_file.write( self._create_event_log_line(username="******", ip=FakeGeoLocation.ip_address_2)) log_file.write('\n') end_date = '2014-04-01' task = UsersPerCountryReportWorkflow( mapreduce_engine='local', name='test', src=[src_path], end_date=datetime.datetime.strptime(end_date, '%Y-%m-%d').date(), geolocation_data=data_filepath, counts=counts_path, report=report_path, ) worker = luigi.worker.Worker() worker.add(task) with patch( 'edx.analytics.tasks.user_location.pygeoip') as mock_pygeoip: mock_pygeoip.GeoIP = Mock(return_value=FakeGeoLocation()) worker.run() worker.stop() output_lines = [] with open(report_path) as report_file: output_lines = report_file.readlines() self.assertEquals(len(output_lines), 3) self.assertEquals(output_lines[0].strip('\n'), UsersPerCountryReport.create_header(end_date)) expected = UsersPerCountryReport.create_csv_entry( 0.5, 1, FakeGeoLocation.country_name_1, FakeGeoLocation.country_code_1) self.assertEquals(output_lines[1].strip('\n'), expected) expected = UsersPerCountryReport.create_csv_entry( 0.5, 1, FakeGeoLocation.country_name_2, FakeGeoLocation.country_code_2) self.assertEquals(output_lines[2].strip('\n'), expected)
def test_workflow(self): # set up directories: src_path = os.path.join(self.temp_rootdir, "src") os.mkdir(src_path) counts_path = os.path.join(self.temp_rootdir, "counts") os.mkdir(counts_path) report_path = os.path.join(self.temp_rootdir, "report.csv") data_filepath = os.path.join(self.temp_rootdir, "geoloc.dat") with open(data_filepath, 'w') as data_file: data_file.write("Dummy geolocation data.") # create input: log_filepath = os.path.join(src_path, "tracking.log") with open(log_filepath, 'w') as log_file: log_file.write(self._create_event_log_line()) log_file.write('\n') log_file.write(self._create_event_log_line(username="******", ip=FakeGeoLocation.ip_address_2)) log_file.write('\n') end_date = '2014-04-01' task = UsersPerCountryReportWorkflow( mapreduce_engine='local', name='test', src=[src_path], end_date=datetime.datetime.strptime(end_date, '%Y-%m-%d').date(), geolocation_data=data_filepath, counts=counts_path, report=report_path, ) worker = luigi.worker.Worker() worker.add(task) with patch('edx.analytics.tasks.user_location.pygeoip') as mock_pygeoip: mock_pygeoip.GeoIP = Mock(return_value=FakeGeoLocation()) worker.run() worker.stop() output_lines = [] with open(report_path) as report_file: output_lines = report_file.readlines() self.assertEquals(len(output_lines), 3) self.assertEquals(output_lines[0].strip('\n'), UsersPerCountryReport.create_header(end_date)) expected = UsersPerCountryReport.create_csv_entry( 0.5, 1, FakeGeoLocation.country_name_1, FakeGeoLocation.country_code_1 ) self.assertEquals(output_lines[1].strip('\n'), expected) expected = UsersPerCountryReport.create_csv_entry( 0.5, 1, FakeGeoLocation.country_name_2, FakeGeoLocation.country_code_2 ) self.assertEquals(output_lines[2].strip('\n'), expected)
def test_report(self): date = '2014-04-01' # Output counts in reverse order, to confirm that sorting works. counts = """ Country_1 Code_1 34 {date} Country_2 Code_2 43 {date} """.format(date=date) output = self.run_task(counts) output_lines = output.split('\n') self.assertEquals(output_lines[0], UsersPerCountryReport.create_header(date)) self.assertEquals( output_lines[1], UsersPerCountryReport.create_csv_entry(float(43) / 77, 43, "Country_2", "Code_2") ) self.assertEquals( output_lines[2], UsersPerCountryReport.create_csv_entry(float(34) / 77, 34, "Country_1", "Code_1") ) # Also confirm the formatting: for line in output_lines[1:2]: self.assertTrue(line.startswith('0.'))
def run_task(self, counts): """ Run task with fake targets. Returns: the task output as a string. """ task = UsersPerCountryReport(counts='fake_counts', report='fake_report') def reformat(string): """Reformat string to make it like a TSV.""" return textwrap.dedent(string).strip().replace(' ', '\t') task.input = MagicMock(return_value=FakeTarget(reformat(counts))) output_target = FakeTarget() task.output = MagicMock(return_value=output_target) task.run() return output_target.buffer.read()
def test_report(self): date = '2014-04-01' # Output counts in reverse order, to confirm that sorting works. counts = """ Country_1 Code_1 34 {date} Country_2 Code_2 43 {date} """.format(date=date) output = self.run_task(counts) output_lines = output.split('\n') self.assertEquals(output_lines[0], UsersPerCountryReport.create_header(date)) self.assertEquals( output_lines[1], UsersPerCountryReport.create_csv_entry( float(43) / 77, 43, "Country_2", "Code_2")) self.assertEquals( output_lines[2], UsersPerCountryReport.create_csv_entry( float(34) / 77, 34, "Country_1", "Code_1")) # Also confirm the formatting: for line in output_lines[1:2]: self.assertTrue(line.startswith('0.'))
def run_task(self, counts): """ Run task with fake targets. Returns: the task output as a string. """ task = UsersPerCountryReport(counts='fake_counts', report='fake_report') def reformat(string): """Reformat string to make it like a TSV.""" return textwrap.dedent(string).strip().replace(' ', '\t') task.input = MagicMock(return_value=FakeTarget(value=reformat(counts))) output_target = FakeTarget() task.output = MagicMock(return_value=output_target) task.run() return output_target.buffer.read()