コード例 #1
0
    def setUp(self):
        self.course_id = 'foo/bar/baz'
        self.username = '******'
        self.prev_week_start_date = datetime.date(2014, 3, 18)
        self.start_date = datetime.date(2014, 3, 25)
        self.date = datetime.date(2014, 4, 1)

        self.reduce_key = (self.course_id, self.username)

        self.input_record = ModuleEngagementSummaryRecord(
            course_id=self.course_id,
            username=self.username,
            start_date=self.start_date,
            end_date=self.date,
            problem_attempts=0,
            problems_attempted=0,
            problems_completed=0,
            problem_attempts_per_completed=0.0,
            videos_viewed=0,
            discussion_contributions=0,
            days_active=0,
        )

        self.range_record = ModuleEngagementSummaryMetricRangeRecord(
            course_id=self.course_id,
            start_date=self.start_date,
            end_date=self.date,
            metric='problems_attempted',
            range_type='high',
            low_value=5.0,
            high_value=10.0)

        self.task = self.task_class(  # pylint: disable=not-callable
            date=self.date,
            output_root=self.DEFAULT_ARGS['output_root'])
コード例 #2
0
    def setUp(self):
        super(ModuleEngagementSummaryMetricRangesDataTaskReducerTest, self).setUp()

        self.reduce_key = 'foo/bar/baz'
        self.input_record = ModuleEngagementSummaryRecord(
            course_id='foo/bar/baz',
            username='******',
            start_date=datetime.date(2014, 3, 25),
            end_date=datetime.date(2014, 4, 1),
            problem_attempts=0,
            problems_attempted=0,
            problems_completed=0,
            problem_attempts_per_completed=0.0,
            videos_viewed=0,
            discussion_contributions=0,
            days_active=0,
        )
コード例 #3
0
    def setUp(self):
        super(ModuleEngagementSummaryMetricRangesDataTaskReducerTest, self).setUp()

        self.reduce_key = 'foo/bar/baz'
        self.input_record = ModuleEngagementSummaryRecord(
            course_id='foo/bar/baz',
            username='******',
            start_date=datetime.date(2014, 3, 25),
            end_date=datetime.date(2014, 4, 1),
            problem_attempts=0,
            problems_attempted=0,
            problems_completed=0,
            problem_attempts_per_completed=0.0,
            videos_viewed=0,
            discussion_contributions=0,
            days_active=0,
        )
コード例 #4
0
    def setUp(self):
        self.course_id = 'foo/bar/baz'
        self.username = '******'
        self.prev_week_start_date = datetime.date(2014, 3, 18)
        self.start_date = datetime.date(2014, 3, 25)
        self.date = datetime.date(2014, 4, 1)

        self.reduce_key = (self.course_id, self.username)

        self.input_record = ModuleEngagementSummaryRecord(
            course_id=self.course_id,
            username=self.username,
            start_date=self.start_date,
            end_date=self.date,
            problem_attempts=0,
            problems_attempted=0,
            problems_completed=0,
            problem_attempts_per_completed=0.0,
            videos_viewed=0,
            discussion_contributions=0,
            days_active=0,
        )

        self.range_record = ModuleEngagementSummaryMetricRangeRecord(
            course_id=self.course_id,
            start_date=self.start_date,
            end_date=self.date,
            metric='problems_attempted',
            range_type='high',
            low_value=5.0,
            high_value=10.0
        )

        self.task = self.task_class(  # pylint: disable=not-callable
            date=self.date,
            output_root=self.DEFAULT_ARGS['output_root'],
            overwrite_from_date=datetime.date(2014, 4, 1),
        )
コード例 #5
0
class ModuleEngagementUserSegmentDataTaskReducerTest(ReducerTestMixin, unittest.TestCase):
    """Base class for test analysis of student engagement summaries"""

    task_class = ModuleEngagementUserSegmentDataTask
    output_record_type = ModuleEngagementUserSegmentRecord

    def setUp(self):
        self.course_id = 'foo/bar/baz'
        self.username = '******'
        self.prev_week_start_date = datetime.date(2014, 3, 18)
        self.start_date = datetime.date(2014, 3, 25)
        self.date = datetime.date(2014, 4, 1)

        self.reduce_key = (self.course_id, self.username)

        self.input_record = ModuleEngagementSummaryRecord(
            course_id=self.course_id,
            username=self.username,
            start_date=self.start_date,
            end_date=self.date,
            problem_attempts=0,
            problems_attempted=0,
            problems_completed=0,
            problem_attempts_per_completed=0.0,
            videos_viewed=0,
            discussion_contributions=0,
            days_active=0,
        )

        self.range_record = ModuleEngagementSummaryMetricRangeRecord(
            course_id=self.course_id,
            start_date=self.start_date,
            end_date=self.date,
            metric='problems_attempted',
            range_type='high',
            low_value=5.0,
            high_value=10.0
        )

        self.task = self.task_class(  # pylint: disable=not-callable
            date=self.date,
            output_root=self.DEFAULT_ARGS['output_root'],
            overwrite_from_date=datetime.date(2014, 4, 1),
        )

    def initialize_task(self, metric_ranges):
        """Given a list of metric ranges, setup the task by calling init_local"""
        metric_ranges_text = '\n'.join([
            r.to_separated_values()
            for r in metric_ranges
        ])

        self.task.input_local = MagicMock(return_value={
            'range_data': FakeTarget(value=metric_ranges_text)
        })
        self.task.init_local()

    def test_init_local(self):
        other_course_record = self.range_record.replace(
            course_id='another/course/id',
            metric='problems_completed'
        )
        self.initialize_task([
            self.range_record,
            self.range_record.replace(
                range_type='low',
                low_value=0.0,
                high_value=3.0
            ),
            other_course_record
        ])

        self.assertEqual(dict(self.task.high_metric_ranges), {
            self.course_id: {
                'problems_attempted': self.range_record
            },
            'another/course/id': {
                'problems_completed': other_course_record
            }
        })

    def test_init_local_empty_input(self):
        self.initialize_task([])
        self.assertEqual(dict(self.task.high_metric_ranges), {})

    def test_output_format(self):
        self.initialize_task([
            self.range_record,
            self.range_record.replace(
                metric='problem_attempts_per_completed',
                low_value=8.0,
                high_value=10.1
            )
        ])
        self._check_output_complete_tuple(
            [
                self.input_record.replace(
                    problems_attempted=6,
                    problem_attempts_per_completed=9
                ).to_separated_values()
            ],
            (
                (
                    'foo/bar/baz',
                    'test_user',
                    '2014-03-25',
                    '2014-04-01',
                    'highly_engaged',
                    'problems_attempted'
                ),
                (
                    'foo/bar/baz',
                    'test_user',
                    '2014-03-25',
                    '2014-04-01',
                    'struggling',
                    'problem_attempts_per_completed'
                ),
            )
        )

    @data(
        'problems_attempted',
        'problems_completed',
        'videos_viewed',
        'discussion_contributions'
    )
    def test_highly_engaged(self, metric):
        self.initialize_task([
            self.range_record.replace(
                metric=metric
            )
        ])
        self._check_output_by_record_field(
            [
                self.input_record.replace(
                    **{metric: 8}
                ).to_separated_values()
            ],
            {
                'segment': 'highly_engaged'
            }
        )

    @data(
        'problem_attempts',
        'problem_attempts_per_completed',
    )
    def test_not_highly_engaged(self, metric):
        self.initialize_task([
            self.range_record.replace(
                metric=metric
            )
        ])
        output = self._get_reducer_output(
            [
                self.input_record.replace(
                    **{metric: 8}
                ).to_separated_values()
            ]
        )
        self.assert_not_in_segment(output, 'highly_engaged')

    def assert_not_in_segment(self, output, segment):
        """Assert that the user was not put into the provided segment."""
        for record_tuple in output:
            record = self.output_record_type.from_string_tuple(record_tuple)
            self.assertNotEqual(record.segment, segment)

    def test_highly_engaged_too_low(self):
        self.initialize_task([
            self.range_record.replace(
                metric='problems_completed'
            )
        ])
        output = self._get_reducer_output(
            [
                self.input_record.replace(
                    problems_completed=0
                ).to_separated_values()
            ]
        )
        self.assert_not_in_segment(output, 'highly_engaged')

    def test_highly_engaged_left_closed_interval_bottom(self):
        self.initialize_task([
            self.range_record.replace(
                metric='problems_completed',
                low_value=6.0
            )
        ])
        output = self._get_reducer_output(
            [
                self.input_record.replace(
                    problems_completed=6
                ).to_separated_values()
            ]
        )
        self.assert_in_segment(output, 'highly_engaged')

    def assert_in_segment(self, output, segment):
        """Assert that the user was put into the provided segment."""
        for record_tuple in output:
            record = self.output_record_type.from_string_tuple(record_tuple)
            if record.segment == segment:
                return True
        return False

    def test_highly_engaged_left_closed_interval_top(self):
        self.initialize_task([
            self.range_record.replace(
                metric='problems_completed',
                high_value=9.0
            )
        ])
        output = self._get_reducer_output(
            [
                self.input_record.replace(
                    problems_completed=9
                ).to_separated_values()
            ]
        )
        self.assert_not_in_segment(output, 'highly_engaged')

    def test_disengaging(self):
        self.initialize_task([])
        output = self._get_reducer_output(
            [
                self.input_record.replace(
                    start_date=self.prev_week_start_date,
                    end_date=self.start_date,
                    days_active=1,
                ).to_separated_values()
            ]
        )
        self.assert_in_segment(output, 'disengaging')

    def test_not_disengaging_only_recent(self):
        self.initialize_task([])
        output = self._get_reducer_output(
            [
                self.input_record.replace(
                    days_active=1
                ).to_separated_values()
            ]
        )
        self.assert_not_in_segment(output, 'disengaging')

    def test_struggling(self):
        self.initialize_task([
            self.range_record.replace(
                metric='problem_attempts_per_completed',
            )
        ])
        output = self._get_reducer_output(
            [
                self.input_record.replace(
                    problem_attempts_per_completed=8.0
                ).to_separated_values()
            ]
        )
        self.assert_in_segment(output, 'struggling')

    def test_struggling_infinite_low_high_value(self):
        self.initialize_task([
            self.range_record.replace(
                metric='problem_attempts_per_completed',
                low_value=float('inf'),
                high_value=float('inf'),
            )
        ])
        output = self._get_reducer_output(
            [
                self.input_record.replace(
                    problem_attempts_per_completed=float('inf')
                ).to_separated_values()
            ]
        )
        self.assert_in_segment(output, 'struggling')

    def test_struggling_infinite_high(self):
        self.initialize_task([
            self.range_record.replace(
                metric='problem_attempts_per_completed',
                high_value=float('inf'),
            )
        ])
        output = self._get_reducer_output(
            [
                self.input_record.replace(
                    problem_attempts_per_completed=10.0
                ).to_separated_values()
            ]
        )
        self.assert_in_segment(output, 'struggling')

    def test_struggling_infinite_high_value(self):
        self.initialize_task([
            self.range_record.replace(
                metric='problem_attempts_per_completed',
                high_value=float('inf'),
            )
        ])
        output = self._get_reducer_output(
            [
                self.input_record.replace(
                    problem_attempts_per_completed=float('inf')
                ).to_separated_values()
            ]
        )
        self.assert_in_segment(output, 'struggling')

    def test_not_struggling(self):
        self.initialize_task([
            self.range_record.replace(
                metric='problem_attempts_per_completed',
            )
        ])
        output = self._get_reducer_output(
            [
                self.input_record.replace(
                    problem_attempts_per_completed=3.0
                ).to_separated_values()
            ]
        )
        self.assert_not_in_segment(output, 'struggling')

    def test_not_struggling_infinite_low(self):
        self.initialize_task([
            self.range_record.replace(
                metric='problem_attempts_per_completed',
                low_value=float('inf')
            )
        ])
        output = self._get_reducer_output(
            [
                self.input_record.replace(
                    problem_attempts_per_completed=100000.0
                ).to_separated_values()
            ]
        )
        self.assert_not_in_segment(output, 'struggling')
コード例 #6
0
class ModuleEngagementSummaryMetricRangesDataTaskReducerTest(ReducerTestMixin, unittest.TestCase):
    """Base class for test analysis of student engagement summaries"""

    task_class = ModuleEngagementSummaryMetricRangesDataTask
    output_record_type = ModuleEngagementSummaryMetricRangeRecord

    def setUp(self):
        super(ModuleEngagementSummaryMetricRangesDataTaskReducerTest, self).setUp()

        self.reduce_key = 'foo/bar/baz'
        self.input_record = ModuleEngagementSummaryRecord(
            course_id='foo/bar/baz',
            username='******',
            start_date=datetime.date(2014, 3, 25),
            end_date=datetime.date(2014, 4, 1),
            problem_attempts=0,
            problems_attempted=0,
            problems_completed=0,
            problem_attempts_per_completed=0.0,
            videos_viewed=0,
            discussion_contributions=0,
            days_active=0,
        )

    def test_simple_distribution(self):
        # [0, 0, 0, 0] (these values are dropped from the set before analyzing)
        # [4, 13, 13, 13] (3 records are <= 13, this accounts for 15% of the total 20 non-zero values)
        # [15] * 4 (throw in a bunch of data in the "normal" range)
        # [50] * 11 (round out the 20 records with some other arbitrary value, note that this will also contain two
        #    of the three highest values)
        # [154] (throw in an outlier - a very high maximum value, this will show the high end of the range, but the
        #    85th percentile should be at the 50 value)
        values = [4] + ([13] * 3) + ([0] * 4) + ([15] * 4) + ([50] * 11) + [154]

        self.assert_ranges(
            values,
            [
                ('low', 0, 13.0),
                ('normal', 13.0, 50.0),
                ('high', 50.0, 'inf'),
            ]
        )

    def assert_ranges(self, values, range_values):
        """Given a list of values, assert that the ranges generated have the min, low, high, and max bounds."""

        # Manufacture some records with these values
        records = [self.input_record.replace(problem_attempts_per_completed=v).to_separated_values() for v in values]

        output = self._get_reducer_output(records)
        range_value_map = {rv[0]: rv for rv in range_values}
        for record in output:
            if record[3] == 'problem_attempts_per_completed':
                range_type, low, high = range_value_map[record[4]]
                self.assertEqual(
                    record,
                    (
                        'foo/bar/baz',
                        '2014-03-25',
                        '2014-04-01',
                        'problem_attempts_per_completed',
                        range_type,
                        str(low),
                        str(high),
                    )
                )

    def test_identical_values(self):
        values = [5] * 6
        self.assert_ranges(values, [('low', 0, 5.0), ('normal', 5.0, 'inf')])

    def test_single_value(self):
        self.assert_ranges([1], [('low', 0, 1.0), ('normal', 1.0, 'inf')])

    def test_single_infinite_value(self):
        values = [float('inf')] * 3
        self.assert_ranges(values, [('low', 0, 'inf'), ('normal', 'inf', 'inf')])

    def test_infinite_threshold_low_normal(self):
        values = [1, float('inf'), float('inf'), float('inf')]
        self.assert_ranges(values, [('low', 0, 'inf'), ('normal', 'inf', 'inf')])

    def test_infinite_threshold_normal_high(self):
        values = [1, 2, 3, float('inf'), float('inf')]
        self.assert_ranges(values, [('low', 0, 1.6), ('normal', 1.6, 'inf'), ('high', 'inf', 'inf')])

    def test_infinite_value_in_high(self):
        values = [1, 2, 3, 4, 5, 6, 7, float('inf')]
        self.assert_ranges(values, [('low', 0, 2.05), ('normal', 2.05, 6.95), ('high', 6.95, 'inf')])

    def test_no_values(self):
        self.assert_ranges([], [('normal', 0, 'inf')])

    def test_single_zero_value(self):
        values = [0] * 3
        self.assert_ranges(values, [('normal', 0, 'inf')])

    def test_zeroes_are_normal(self):
        values = [1, 0, 0, 0]
        self.assert_ranges(values, [('normal', 0, 0.55), ('high', 0.55, 'inf')])

    def test_zeroes_are_low(self):
        values = [0, 0, 0] + ([1] * 10) + ([2]*4)
        self.assert_ranges(values, [('low', 0, 0.4), ('normal', 0.4, 2.0), ('high', 2.0, 'inf')])
コード例 #7
0
class ModuleEngagementUserSegmentDataTaskReducerTest(ReducerTestMixin,
                                                     unittest.TestCase):
    """Base class for test analysis of student engagement summaries"""

    task_class = ModuleEngagementUserSegmentDataTask
    output_record_type = ModuleEngagementUserSegmentRecord

    def setUp(self):
        self.course_id = 'foo/bar/baz'
        self.username = '******'
        self.prev_week_start_date = datetime.date(2014, 3, 18)
        self.start_date = datetime.date(2014, 3, 25)
        self.date = datetime.date(2014, 4, 1)

        self.reduce_key = (self.course_id, self.username)

        self.input_record = ModuleEngagementSummaryRecord(
            course_id=self.course_id,
            username=self.username,
            start_date=self.start_date,
            end_date=self.date,
            problem_attempts=0,
            problems_attempted=0,
            problems_completed=0,
            problem_attempts_per_completed=0.0,
            videos_viewed=0,
            discussion_contributions=0,
            days_active=0,
        )

        self.range_record = ModuleEngagementSummaryMetricRangeRecord(
            course_id=self.course_id,
            start_date=self.start_date,
            end_date=self.date,
            metric='problems_attempted',
            range_type='high',
            low_value=5.0,
            high_value=10.0)

        self.task = self.task_class(  # pylint: disable=not-callable
            date=self.date,
            output_root=self.DEFAULT_ARGS['output_root'])

    def initialize_task(self, metric_ranges):
        """Given a list of metric ranges, setup the task by calling init_local"""
        metric_ranges_text = '\n'.join(
            [r.to_separated_values() for r in metric_ranges])

        self.task.input_local = MagicMock(
            return_value={'range_data': FakeTarget(value=metric_ranges_text)})
        self.task.init_local()

    def test_init_local(self):
        other_course_record = self.range_record.replace(
            course_id='another/course/id', metric='problems_completed')
        self.initialize_task([
            self.range_record,
            self.range_record.replace(range_type='low',
                                      low_value=0.0,
                                      high_value=3.0), other_course_record
        ])

        self.assertEqual(
            dict(self.task.high_metric_ranges), {
                self.course_id: {
                    'problems_attempted': self.range_record
                },
                'another/course/id': {
                    'problems_completed': other_course_record
                }
            })

    def test_init_local_empty_input(self):
        self.initialize_task([])
        self.assertEqual(dict(self.task.high_metric_ranges), {})

    def test_output_format(self):
        self.initialize_task([
            self.range_record,
            self.range_record.replace(metric='problem_attempts_per_completed',
                                      low_value=8.0,
                                      high_value=10.1)
        ])
        self._check_output_complete_tuple([
            self.input_record.replace(
                problems_attempted=6,
                problem_attempts_per_completed=9).to_separated_values()
        ], (
            ('foo/bar/baz', 'test_user', '2014-03-25', '2014-04-01',
             'highly_engaged', 'problems_attempted'),
            ('foo/bar/baz', 'test_user', '2014-03-25', '2014-04-01',
             'struggling', 'problem_attempts_per_completed'),
        ))

    @data('problems_attempted', 'problems_completed', 'videos_viewed',
          'discussion_contributions')
    def test_highly_engaged(self, metric):
        self.initialize_task([self.range_record.replace(metric=metric)])
        self._check_output_by_record_field(
            [self.input_record.replace(**{
                metric: 8
            }).to_separated_values()], {'segment': 'highly_engaged'})

    @data(
        'problem_attempts',
        'problem_attempts_per_completed',
    )
    def test_not_highly_engaged(self, metric):
        self.initialize_task([self.range_record.replace(metric=metric)])
        output = self._get_reducer_output(
            [self.input_record.replace(**{
                metric: 8
            }).to_separated_values()])
        self.assert_not_in_segment(output, 'highly_engaged')

    def assert_not_in_segment(self, output, segment):
        """Assert that the user was not put into the provided segment."""
        for record_tuple in output:
            record = self.output_record_type.from_string_tuple(record_tuple)
            self.assertNotEqual(record.segment, segment)

    def test_highly_engaged_too_low(self):
        self.initialize_task(
            [self.range_record.replace(metric='problems_completed')])
        output = self._get_reducer_output([
            self.input_record.replace(
                problems_completed=0).to_separated_values()
        ])
        self.assert_not_in_segment(output, 'highly_engaged')

    def test_highly_engaged_left_closed_interval_bottom(self):
        self.initialize_task([
            self.range_record.replace(metric='problems_completed',
                                      low_value=6.0)
        ])
        output = self._get_reducer_output([
            self.input_record.replace(
                problems_completed=6).to_separated_values()
        ])
        self.assert_in_segment(output, 'highly_engaged')

    def assert_in_segment(self, output, segment):
        """Assert that the user was put into the provided segment."""
        for record_tuple in output:
            record = self.output_record_type.from_string_tuple(record_tuple)
            if record.segment == segment:
                return True
        return False

    def test_highly_engaged_left_closed_interval_top(self):
        self.initialize_task([
            self.range_record.replace(metric='problems_completed',
                                      high_value=9.0)
        ])
        output = self._get_reducer_output([
            self.input_record.replace(
                problems_completed=9).to_separated_values()
        ])
        self.assert_not_in_segment(output, 'highly_engaged')

    def test_disengaging(self):
        self.initialize_task([])
        output = self._get_reducer_output([
            self.input_record.replace(
                start_date=self.prev_week_start_date,
                end_date=self.start_date,
                days_active=1,
            ).to_separated_values()
        ])
        self.assert_in_segment(output, 'disengaging')

    def test_not_disengaging_only_recent(self):
        self.initialize_task([])
        output = self._get_reducer_output(
            [self.input_record.replace(days_active=1).to_separated_values()])
        self.assert_not_in_segment(output, 'disengaging')

    def test_struggling(self):
        self.initialize_task([
            self.range_record.replace(
                metric='problem_attempts_per_completed', )
        ])
        output = self._get_reducer_output([
            self.input_record.replace(
                problem_attempts_per_completed=8.0).to_separated_values()
        ])
        self.assert_in_segment(output, 'struggling')

    def test_struggling_infinite_low_high_value(self):
        self.initialize_task([
            self.range_record.replace(
                metric='problem_attempts_per_completed',
                low_value=float('inf'),
                high_value=float('inf'),
            )
        ])
        output = self._get_reducer_output([
            self.input_record.replace(problem_attempts_per_completed=float(
                'inf')).to_separated_values()
        ])
        self.assert_in_segment(output, 'struggling')

    def test_struggling_infinite_high(self):
        self.initialize_task([
            self.range_record.replace(
                metric='problem_attempts_per_completed',
                high_value=float('inf'),
            )
        ])
        output = self._get_reducer_output([
            self.input_record.replace(
                problem_attempts_per_completed=10.0).to_separated_values()
        ])
        self.assert_in_segment(output, 'struggling')

    def test_struggling_infinite_high_value(self):
        self.initialize_task([
            self.range_record.replace(
                metric='problem_attempts_per_completed',
                high_value=float('inf'),
            )
        ])
        output = self._get_reducer_output([
            self.input_record.replace(problem_attempts_per_completed=float(
                'inf')).to_separated_values()
        ])
        self.assert_in_segment(output, 'struggling')

    def test_not_struggling(self):
        self.initialize_task([
            self.range_record.replace(
                metric='problem_attempts_per_completed', )
        ])
        output = self._get_reducer_output([
            self.input_record.replace(
                problem_attempts_per_completed=3.0).to_separated_values()
        ])
        self.assert_not_in_segment(output, 'struggling')

    def test_not_struggling_infinite_low(self):
        self.initialize_task([
            self.range_record.replace(metric='problem_attempts_per_completed',
                                      low_value=float('inf'))
        ])
        output = self._get_reducer_output([
            self.input_record.replace(
                problem_attempts_per_completed=100000.0).to_separated_values()
        ])
        self.assert_not_in_segment(output, 'struggling')
コード例 #8
0
class ModuleEngagementSummaryMetricRangesDataTaskReducerTest(
        ReducerTestMixin, unittest.TestCase):
    """Base class for test analysis of student engagement summaries"""

    task_class = ModuleEngagementSummaryMetricRangesDataTask
    output_record_type = ModuleEngagementSummaryMetricRangeRecord

    def setUp(self):
        super(ModuleEngagementSummaryMetricRangesDataTaskReducerTest,
              self).setUp()

        self.reduce_key = 'foo/bar/baz'
        self.input_record = ModuleEngagementSummaryRecord(
            course_id='foo/bar/baz',
            username='******',
            start_date=datetime.date(2014, 3, 25),
            end_date=datetime.date(2014, 4, 1),
            problem_attempts=0,
            problems_attempted=0,
            problems_completed=0,
            problem_attempts_per_completed=0.0,
            videos_viewed=0,
            discussion_contributions=0,
            days_active=0,
        )

    def test_simple_distribution(self):
        # [0, 0, 0, 0] (these values are dropped from the set before analyzing)
        # [4, 13, 13, 13] (3 records are <= 13, this accounts for 15% of the total 20 non-zero values)
        # [15] * 4 (throw in a bunch of data in the "normal" range)
        # [50] * 11 (round out the 20 records with some other arbitrary value, note that this will also contain two
        #    of the three highest values)
        # [154] (throw in an outlier - a very high maximum value, this will show the high end of the range, but the
        #    85th percentile should be at the 50 value)
        values = [4] + ([13] * 3) + ([0] * 4) + ([15] * 4) + ([50] *
                                                              11) + [154]

        self.assert_ranges(values, 13.0, 50.0)

    def assert_ranges(self, values, low, high):
        """Given a list of values, assert that the ranges generated have the min, low, high, and max bounds."""

        # Manufacture some records with these values
        records = [
            self.input_record.replace(
                problem_attempts_per_completed=v).to_separated_values()
            for v in values
        ]

        self._check_output_complete_tuple(records, (
            (
                'foo/bar/baz',
                '2014-03-25',
                '2014-04-01',
                'problem_attempts_per_completed',
                'low',
                '0',
                str(low),
            ),
            (
                'foo/bar/baz',
                '2014-03-25',
                '2014-04-01',
                'problem_attempts_per_completed',
                'normal',
                str(low),
                str(high),
            ),
            (
                'foo/bar/baz',
                '2014-03-25',
                '2014-04-01',
                'problem_attempts_per_completed',
                'high',
                str(high),
                'inf',
            ),
        ))

    def test_identical_values(self):
        values = [5] * 6
        self.assert_ranges(values, 5.0, 5.0)

    def test_single_value(self):
        self.assert_ranges([1], 1.0, 1.0)

    def test_very_small_values(self):
        self.assert_ranges(([0.01] * 10) + ([0.09] * 10), 0.01, 0.09)

    def test_infinite_value(self):
        self.assert_ranges(([1.0] * 19) + [float('inf')], 1.0, 1.0)
コード例 #9
0
class ModuleEngagementSummaryMetricRangesDataTaskReducerTest(ReducerTestMixin, unittest.TestCase):
    """Base class for test analysis of student engagement summaries"""

    task_class = ModuleEngagementSummaryMetricRangesDataTask
    output_record_type = ModuleEngagementSummaryMetricRangeRecord

    def setUp(self):
        super(ModuleEngagementSummaryMetricRangesDataTaskReducerTest, self).setUp()

        self.reduce_key = 'foo/bar/baz'
        self.input_record = ModuleEngagementSummaryRecord(
            course_id='foo/bar/baz',
            username='******',
            start_date=datetime.date(2014, 3, 25),
            end_date=datetime.date(2014, 4, 1),
            problem_attempts=0,
            problems_attempted=0,
            problems_completed=0,
            problem_attempts_per_completed=0.0,
            videos_viewed=0,
            discussion_contributions=0,
            days_active=0,
        )

    def test_simple_distribution(self):
        # [0, 0, 0, 0] (these values are dropped from the set before analyzing)
        # [4, 13, 13, 13] (3 records are <= 13, this accounts for 15% of the total 20 non-zero values)
        # [15] * 4 (throw in a bunch of data in the "normal" range)
        # [50] * 11 (round out the 20 records with some other arbitrary value, note that this will also contain two
        #    of the three highest values)
        # [154] (throw in an outlier - a very high maximum value, this will show the high end of the range, but the
        #    85th percentile should be at the 50 value)
        values = [4] + ([13] * 3) + ([0] * 4) + ([15] * 4) + ([50] * 11) + [154]

        self.assert_ranges(
            values,
            [
                ('low', 0, 13.0),
                ('normal', 13.0, 50.0),
                ('high', 50.0, 'inf'),
            ]
        )

    def assert_ranges(self, values, range_values):
        """Given a list of values, assert that the ranges generated have the min, low, high, and max bounds."""

        # Manufacture some records with these values
        records = [self.input_record.replace(problem_attempts_per_completed=v).to_separated_values() for v in values]

        output = self._get_reducer_output(records)
        range_value_map = {rv[0]: rv for rv in range_values}
        for record in output:
            if record[3] == 'problem_attempts_per_completed':
                range_type, low, high = range_value_map[record[4]]
                self.assertEqual(
                    record,
                    (
                        'foo/bar/baz',
                        '2014-03-25',
                        '2014-04-01',
                        'problem_attempts_per_completed',
                        range_type,
                        str(low),
                        str(high),
                    )
                )

    def test_identical_values(self):
        values = [5] * 6
        self.assert_ranges(values, [('low', 0, 5.0), ('normal', 5.0, 'inf')])

    def test_single_value(self):
        self.assert_ranges([1], [('low', 0, 1.0), ('normal', 1.0, 'inf')])

    def test_single_infinite_value(self):
        values = [float('inf')] * 3
        self.assert_ranges(values, [('low', 0, 'inf'), ('normal', 'inf', 'inf')])

    def test_infinite_threshold_low_normal(self):
        values = [1, float('inf'), float('inf'), float('inf')]
        self.assert_ranges(values, [('low', 0, 'inf'), ('normal', 'inf', 'inf')])

    def test_infinite_threshold_normal_high(self):
        values = [1, 2, 3, float('inf'), float('inf')]
        self.assert_ranges(values, [('low', 0, 1.6), ('normal', 1.6, 'inf'), ('high', 'inf', 'inf')])

    def test_infinite_value_in_high(self):
        values = [1, 2, 3, 4, 5, 6, 7, float('inf')]
        self.assert_ranges(values, [('low', 0, 2.05), ('normal', 2.05, 6.95), ('high', 6.95, 'inf')])

    def test_no_values(self):
        self.assert_ranges([], [('normal', 0, 'inf')])

    def test_single_zero_value(self):
        values = [0] * 3
        self.assert_ranges(values, [('normal', 0, 'inf')])

    def test_zeroes_are_normal(self):
        values = [1, 0, 0, 0]
        self.assert_ranges(values, [('normal', 0, 0.55), ('high', 0.55, 'inf')])

    def test_zeroes_are_low(self):
        values = [0, 0, 0] + ([1] * 10) + ([2]*4)
        self.assert_ranges(values, [('low', 0, 0.4), ('normal', 0.4, 2.0), ('high', 2.0, 'inf')])