def setUp(self): self.course_id = 'foo/bar/baz' self.username = '******' self.prev_week_start_date = datetime.date(2014, 3, 18) self.start_date = datetime.date(2014, 3, 25) self.date = datetime.date(2014, 4, 1) self.reduce_key = (self.course_id, self.username) self.input_record = ModuleEngagementSummaryRecord( course_id=self.course_id, username=self.username, start_date=self.start_date, end_date=self.date, problem_attempts=0, problems_attempted=0, problems_completed=0, problem_attempts_per_completed=0.0, videos_viewed=0, discussion_contributions=0, days_active=0, ) self.range_record = ModuleEngagementSummaryMetricRangeRecord( course_id=self.course_id, start_date=self.start_date, end_date=self.date, metric='problems_attempted', range_type='high', low_value=5.0, high_value=10.0) self.task = self.task_class( # pylint: disable=not-callable date=self.date, output_root=self.DEFAULT_ARGS['output_root'], overwrite_from_date=datetime.date(2014, 4, 1), )
def setUp(self): self.course_id = 'foo/bar/baz' self.username = '******' self.prev_week_start_date = datetime.date(2014, 3, 18) self.start_date = datetime.date(2014, 3, 25) self.date = datetime.date(2014, 4, 1) self.reduce_key = (self.course_id, self.username) self.input_record = ModuleEngagementSummaryRecord( course_id=self.course_id, username=self.username, start_date=self.start_date, end_date=self.date, problem_attempts=0, problems_attempted=0, problems_completed=0, problem_attempts_per_completed=0.0, videos_viewed=0, discussion_contributions=0, days_active=0, ) self.range_record = ModuleEngagementSummaryMetricRangeRecord( course_id=self.course_id, start_date=self.start_date, end_date=self.date, metric='problems_attempted', range_type='high', low_value=5.0, high_value=10.0 ) self.task = self.task_class( # pylint: disable=not-callable date=self.date, output_root=self.DEFAULT_ARGS['output_root'], overwrite_from_date=datetime.date(2014, 4, 1), )
class ModuleEngagementUserSegmentDataTaskReducerTest(ReducerTestMixin, TestCase): """Base class for test analysis of student engagement summaries""" task_class = ModuleEngagementUserSegmentDataTask output_record_type = ModuleEngagementUserSegmentRecord def setUp(self): self.course_id = 'foo/bar/baz' self.username = '******' self.prev_week_start_date = datetime.date(2014, 3, 18) self.start_date = datetime.date(2014, 3, 25) self.date = datetime.date(2014, 4, 1) self.reduce_key = (self.course_id, self.username) self.input_record = ModuleEngagementSummaryRecord( course_id=self.course_id, username=self.username, start_date=self.start_date, end_date=self.date, problem_attempts=0, problems_attempted=0, problems_completed=0, problem_attempts_per_completed=0.0, videos_viewed=0, discussion_contributions=0, days_active=0, ) self.range_record = ModuleEngagementSummaryMetricRangeRecord( course_id=self.course_id, start_date=self.start_date, end_date=self.date, metric='problems_attempted', range_type='high', low_value=5.0, high_value=10.0 ) self.task = self.task_class( # pylint: disable=not-callable date=self.date, output_root=self.DEFAULT_ARGS['output_root'], overwrite_from_date=datetime.date(2014, 4, 1), ) def initialize_task(self, metric_ranges): """Given a list of metric ranges, setup the task by calling init_local""" metric_ranges_text = '\n'.join([ r.to_separated_values() for r in metric_ranges ]) self.task.input_local = MagicMock(return_value={ 'range_data': FakeTarget(value=metric_ranges_text) }) self.task.init_local() def test_init_local(self): other_course_record = self.range_record.replace( course_id='another/course/id', metric='problems_completed' ) self.initialize_task([ self.range_record, self.range_record.replace( range_type='low', low_value=0.0, high_value=3.0 ), other_course_record ]) self.assertEqual(dict(self.task.high_metric_ranges), { self.course_id: { 'problems_attempted': self.range_record }, 'another/course/id': { 'problems_completed': other_course_record } }) def test_init_local_empty_input(self): self.initialize_task([]) self.assertEqual(dict(self.task.high_metric_ranges), {}) def test_output_format(self): self.initialize_task([ self.range_record, self.range_record.replace( metric='problem_attempts_per_completed', low_value=8.0, high_value=10.1 ) ]) self._check_output_complete_tuple( [ self.input_record.replace( problems_attempted=6, problem_attempts_per_completed=9 ).to_separated_values() ], ( ( 'foo/bar/baz', 'test_user', '2014-03-25', '2014-04-01', 'highly_engaged', 'problems_attempted' ), ( 'foo/bar/baz', 'test_user', '2014-03-25', '2014-04-01', 'struggling', 'problem_attempts_per_completed' ), ) ) @data( 'problems_attempted', 'problems_completed', 'videos_viewed', 'discussion_contributions' ) def test_highly_engaged(self, metric): self.initialize_task([ self.range_record.replace( metric=metric ) ]) self._check_output_by_record_field( [ self.input_record.replace( **{metric: 8} ).to_separated_values() ], { 'segment': 'highly_engaged' } ) @data( 'problem_attempts', 'problem_attempts_per_completed', ) def test_not_highly_engaged(self, metric): self.initialize_task([ self.range_record.replace( metric=metric ) ]) output = self._get_reducer_output( [ self.input_record.replace( **{metric: 8} ).to_separated_values() ] ) self.assert_not_in_segment(output, 'highly_engaged') def assert_not_in_segment(self, output, segment): """Assert that the user was not put into the provided segment.""" for record_tuple in output: record = self.output_record_type.from_string_tuple(record_tuple) self.assertNotEqual(record.segment, segment) def test_highly_engaged_too_low(self): self.initialize_task([ self.range_record.replace( metric='problems_completed' ) ]) output = self._get_reducer_output( [ self.input_record.replace( problems_completed=0 ).to_separated_values() ] ) self.assert_not_in_segment(output, 'highly_engaged') def test_highly_engaged_left_closed_interval_bottom(self): self.initialize_task([ self.range_record.replace( metric='problems_completed', low_value=6.0 ) ]) output = self._get_reducer_output( [ self.input_record.replace( problems_completed=6 ).to_separated_values() ] ) self.assert_in_segment(output, 'highly_engaged') def assert_in_segment(self, output, segment): """Assert that the user was put into the provided segment.""" for record_tuple in output: record = self.output_record_type.from_string_tuple(record_tuple) if record.segment == segment: return True return False def test_highly_engaged_left_closed_interval_top(self): self.initialize_task([ self.range_record.replace( metric='problems_completed', high_value=9.0 ) ]) output = self._get_reducer_output( [ self.input_record.replace( problems_completed=9 ).to_separated_values() ] ) self.assert_not_in_segment(output, 'highly_engaged') def test_disengaging(self): self.initialize_task([]) output = self._get_reducer_output( [ self.input_record.replace( start_date=self.prev_week_start_date, end_date=self.start_date, days_active=1, ).to_separated_values() ] ) self.assert_in_segment(output, 'disengaging') def test_not_disengaging_only_recent(self): self.initialize_task([]) output = self._get_reducer_output( [ self.input_record.replace( days_active=1 ).to_separated_values() ] ) self.assert_not_in_segment(output, 'disengaging') def test_struggling(self): self.initialize_task([ self.range_record.replace( metric='problem_attempts_per_completed', ) ]) output = self._get_reducer_output( [ self.input_record.replace( problem_attempts_per_completed=8.0 ).to_separated_values() ] ) self.assert_in_segment(output, 'struggling') def test_struggling_infinite_low_high_value(self): self.initialize_task([ self.range_record.replace( metric='problem_attempts_per_completed', low_value=float('inf'), high_value=float('inf'), ) ]) output = self._get_reducer_output( [ self.input_record.replace( problem_attempts_per_completed=float('inf') ).to_separated_values() ] ) self.assert_in_segment(output, 'struggling') def test_struggling_infinite_high(self): self.initialize_task([ self.range_record.replace( metric='problem_attempts_per_completed', high_value=float('inf'), ) ]) output = self._get_reducer_output( [ self.input_record.replace( problem_attempts_per_completed=10.0 ).to_separated_values() ] ) self.assert_in_segment(output, 'struggling') def test_struggling_infinite_high_value(self): self.initialize_task([ self.range_record.replace( metric='problem_attempts_per_completed', high_value=float('inf'), ) ]) output = self._get_reducer_output( [ self.input_record.replace( problem_attempts_per_completed=float('inf') ).to_separated_values() ] ) self.assert_in_segment(output, 'struggling') def test_not_struggling(self): self.initialize_task([ self.range_record.replace( metric='problem_attempts_per_completed', ) ]) output = self._get_reducer_output( [ self.input_record.replace( problem_attempts_per_completed=3.0 ).to_separated_values() ] ) self.assert_not_in_segment(output, 'struggling') def test_not_struggling_infinite_low(self): self.initialize_task([ self.range_record.replace( metric='problem_attempts_per_completed', low_value=float('inf') ) ]) output = self._get_reducer_output( [ self.input_record.replace( problem_attempts_per_completed=100000.0 ).to_separated_values() ] ) self.assert_not_in_segment(output, 'struggling')
class ModuleEngagementUserSegmentDataTaskReducerTest(ReducerTestMixin, TestCase): """Base class for test analysis of student engagement summaries""" task_class = ModuleEngagementUserSegmentDataTask output_record_type = ModuleEngagementUserSegmentRecord def setUp(self): self.course_id = 'foo/bar/baz' self.username = '******' self.prev_week_start_date = datetime.date(2014, 3, 18) self.start_date = datetime.date(2014, 3, 25) self.date = datetime.date(2014, 4, 1) self.reduce_key = (self.course_id, self.username) self.input_record = ModuleEngagementSummaryRecord( course_id=self.course_id, username=self.username, start_date=self.start_date, end_date=self.date, problem_attempts=0, problems_attempted=0, problems_completed=0, problem_attempts_per_completed=0.0, videos_viewed=0, discussion_contributions=0, days_active=0, ) self.range_record = ModuleEngagementSummaryMetricRangeRecord( course_id=self.course_id, start_date=self.start_date, end_date=self.date, metric='problems_attempted', range_type='high', low_value=5.0, high_value=10.0) self.task = self.task_class( # pylint: disable=not-callable date=self.date, output_root=self.DEFAULT_ARGS['output_root'], overwrite_from_date=datetime.date(2014, 4, 1), ) def initialize_task(self, metric_ranges): """Given a list of metric ranges, setup the task by calling init_local""" metric_ranges_text = '\n'.join( [r.to_separated_values() for r in metric_ranges]) self.task.input_local = MagicMock( return_value={'range_data': FakeTarget(value=metric_ranges_text)}) self.task.init_local() def test_init_local(self): other_course_record = self.range_record.replace( course_id='another/course/id', metric='problems_completed') self.initialize_task([ self.range_record, self.range_record.replace(range_type='low', low_value=0.0, high_value=3.0), other_course_record ]) self.assertEqual( dict(self.task.high_metric_ranges), { self.course_id: { 'problems_attempted': self.range_record }, 'another/course/id': { 'problems_completed': other_course_record } }) def test_init_local_empty_input(self): self.initialize_task([]) self.assertEqual(dict(self.task.high_metric_ranges), {}) def test_output_format(self): self.initialize_task([ self.range_record, self.range_record.replace(metric='problem_attempts_per_completed', low_value=8.0, high_value=10.1) ]) self._check_output_complete_tuple([ self.input_record.replace( problems_attempted=6, problem_attempts_per_completed=9).to_separated_values() ], ( ('foo/bar/baz', 'test_user', '2014-03-25', '2014-04-01', 'highly_engaged', 'problems_attempted'), ('foo/bar/baz', 'test_user', '2014-03-25', '2014-04-01', 'struggling', 'problem_attempts_per_completed'), )) @data('problems_attempted', 'problems_completed', 'videos_viewed', 'discussion_contributions') def test_highly_engaged(self, metric): self.initialize_task([self.range_record.replace(metric=metric)]) self._check_output_by_record_field( [self.input_record.replace(**{ metric: 8 }).to_separated_values()], {'segment': 'highly_engaged'}) @data( 'problem_attempts', 'problem_attempts_per_completed', ) def test_not_highly_engaged(self, metric): self.initialize_task([self.range_record.replace(metric=metric)]) output = self._get_reducer_output( [self.input_record.replace(**{ metric: 8 }).to_separated_values()]) self.assert_not_in_segment(output, 'highly_engaged') def assert_not_in_segment(self, output, segment): """Assert that the user was not put into the provided segment.""" for record_tuple in output: record = self.output_record_type.from_string_tuple(record_tuple) self.assertNotEqual(record.segment, segment) def test_highly_engaged_too_low(self): self.initialize_task( [self.range_record.replace(metric='problems_completed')]) output = self._get_reducer_output([ self.input_record.replace( problems_completed=0).to_separated_values() ]) self.assert_not_in_segment(output, 'highly_engaged') def test_highly_engaged_left_closed_interval_bottom(self): self.initialize_task([ self.range_record.replace(metric='problems_completed', low_value=6.0) ]) output = self._get_reducer_output([ self.input_record.replace( problems_completed=6).to_separated_values() ]) self.assert_in_segment(output, 'highly_engaged') def assert_in_segment(self, output, segment): """Assert that the user was put into the provided segment.""" for record_tuple in output: record = self.output_record_type.from_string_tuple(record_tuple) if record.segment == segment: return True return False def test_highly_engaged_left_closed_interval_top(self): self.initialize_task([ self.range_record.replace(metric='problems_completed', high_value=9.0) ]) output = self._get_reducer_output([ self.input_record.replace( problems_completed=9).to_separated_values() ]) self.assert_not_in_segment(output, 'highly_engaged') def test_disengaging(self): self.initialize_task([]) output = self._get_reducer_output([ self.input_record.replace( start_date=self.prev_week_start_date, end_date=self.start_date, days_active=1, ).to_separated_values() ]) self.assert_in_segment(output, 'disengaging') def test_not_disengaging_only_recent(self): self.initialize_task([]) output = self._get_reducer_output( [self.input_record.replace(days_active=1).to_separated_values()]) self.assert_not_in_segment(output, 'disengaging') def test_struggling(self): self.initialize_task([ self.range_record.replace( metric='problem_attempts_per_completed', ) ]) output = self._get_reducer_output([ self.input_record.replace( problem_attempts_per_completed=8.0).to_separated_values() ]) self.assert_in_segment(output, 'struggling') def test_struggling_infinite_low_high_value(self): self.initialize_task([ self.range_record.replace( metric='problem_attempts_per_completed', low_value=float('inf'), high_value=float('inf'), ) ]) output = self._get_reducer_output([ self.input_record.replace(problem_attempts_per_completed=float( 'inf')).to_separated_values() ]) self.assert_in_segment(output, 'struggling') def test_struggling_infinite_high(self): self.initialize_task([ self.range_record.replace( metric='problem_attempts_per_completed', high_value=float('inf'), ) ]) output = self._get_reducer_output([ self.input_record.replace( problem_attempts_per_completed=10.0).to_separated_values() ]) self.assert_in_segment(output, 'struggling') def test_struggling_infinite_high_value(self): self.initialize_task([ self.range_record.replace( metric='problem_attempts_per_completed', high_value=float('inf'), ) ]) output = self._get_reducer_output([ self.input_record.replace(problem_attempts_per_completed=float( 'inf')).to_separated_values() ]) self.assert_in_segment(output, 'struggling') def test_not_struggling(self): self.initialize_task([ self.range_record.replace( metric='problem_attempts_per_completed', ) ]) output = self._get_reducer_output([ self.input_record.replace( problem_attempts_per_completed=3.0).to_separated_values() ]) self.assert_not_in_segment(output, 'struggling') def test_not_struggling_infinite_low(self): self.initialize_task([ self.range_record.replace(metric='problem_attempts_per_completed', low_value=float('inf')) ]) output = self._get_reducer_output([ self.input_record.replace( problem_attempts_per_completed=100000.0).to_separated_values() ]) self.assert_not_in_segment(output, 'struggling')
def requires(self): kwargs = { 'warehouse_path': self.warehouse_path, 'overwrite': True, 'schema': self.schema, 'credentials': self.credentials, 'read_timeout': self.read_timeout, 'marker_schema': self.marker_schema, } yield ( LoadHiveTableToVertica( table_name='course_activity', sql_schema=CourseActivityRecord.get_sql_schema(), **kwargs ), LoadHiveTableToVertica( table_name='course_enrollment_daily', sql_schema=EnrollmentDailyRecord.get_sql_schema(), **kwargs ), LoadHiveTableToVertica( table_name='course_enrollment_birth_year_daily', sql_schema=EnrollmentByBirthYearRecord.get_sql_schema(), **kwargs ), LoadHiveTableToVertica( table_name='course_enrollment_education_level_daily', sql_schema=EnrollmentByEducationLevelRecord.get_sql_schema(), **kwargs ), LoadHiveTableToVertica( table_name='course_enrollment_gender_daily', sql_schema=EnrollmentByGenderRecord.get_sql_schema(), **kwargs ), LoadHiveTableToVertica( table_name='course_enrollment_mode_daily', sql_schema=EnrollmentByModeRecord.get_sql_schema(), **kwargs ), LoadHiveTableToVertica( table_name='course_meta_summary_enrollment', sql_schema=CourseSummaryEnrollmentRecord.get_sql_schema(), **kwargs ), LoadHiveTableToVertica( table_name='course_program_metadata', sql_schema=CourseProgramMetadataRecord.get_sql_schema(), **kwargs ), LoadHiveTableToVertica( table_name='course_enrollment_location_current', load_from_latest_partition=False, sql_schema=LastCountryPerCourseRecord.get_sql_schema(), **kwargs ), LoadHiveTableToVertica( table_name='module_engagement', sql_schema=ModuleEngagementRecord.get_sql_schema(), **kwargs ), LoadHiveTableToVertica( table_name='module_engagement_metric_ranges', sql_schema=ModuleEngagementSummaryMetricRangeRecord.get_sql_schema(), **kwargs ), LoadHiveTableToVertica( table_name='video_timeline', sql_schema=VideoTimelineRecord.get_sql_schema(), **kwargs ), LoadHiveTableToVertica( table_name='video', sql_schema=VideoSegmentSummaryRecord.get_sql_schema(), **kwargs ), LoadHiveTableToVertica( table_name='last_country_of_user_id', sql_schema=LastCountryOfUserRecord.get_sql_schema(), **kwargs ), LoadHiveTableToVertica( table_name='enterprise_enrollment', sql_schema=EnterpriseEnrollmentRecord.get_sql_schema(), **kwargs ), LoadHiveTableToVertica( table_name='enterprise_user', sql_schema=EnterpriseUserRecord.get_sql_schema(), **kwargs ) )