Beispiel #1
0
    def setUp(self):
        self.task_class = AnswerDistributionOneFilePerCourseTask
        super(AnswerDistributionOneFilePerCourseTaskTest, self).setUp()

        self.task = AnswerDistributionOneFilePerCourseTask(
            mapreduce_engine='local',
            src=None,
            dest=None,
            name=None,
            include=None,
            output_root=None,
        )
 def test_output_path_for_opaque_key(self):
     course_id = str(CourseLocator(org='foo', course='bar', run='baz'))
     hashed_course_id = hashlib.sha1(course_id).hexdigest()
     task = AnswerDistributionOneFilePerCourseTask(
         mapreduce_engine='local',
         src=[],
         dest=None,
         name='name',
         include=[],
         output_root='/tmp',
     )
     output_path = task.output_path_for_key(course_id)
     expected_output_path = '/tmp/{0}/foo_bar_baz_answer_distribution.csv'.format(hashed_course_id)
     self.assertEquals(output_path, expected_output_path)
 def test_output_path_for_legacy_key(self):
     course_id = 'foo/bar/baz'
     hashed_course_id = hashlib.sha1(course_id).hexdigest()
     task = AnswerDistributionOneFilePerCourseTask(
         mapreduce_engine='local',
         src=None,
         dest=None,
         name='name',
         include=None,
         output_root='/tmp',
     )
     output_path = task.output_path_for_key(course_id)
     expected_output_path = '/tmp/{0}/foo_bar_baz_answer_distribution.csv'.format(hashed_course_id)
     self.assertEquals(output_path, expected_output_path)
Beispiel #4
0
 def test_delete_output_root(self):
     # It's still possible to use the delete option
     # to get rid of the output_root directory.
     task = AnswerDistributionOneFilePerCourseTask(
         mapreduce_engine='local',
         src=None,
         dest=None,
         name='name',
         include=None,
         output_root=self.output_root,
         delete_output_root="true",
         marker=self.output_root,
     )
     self.assertFalse(task.complete())
     self.assertFalse(os.path.exists(self.output_root))
Beispiel #5
0
 def test_output_path_for_opaque_key(self):
     course_id = str(CourseLocator(org='foo', course='bar', run='baz'))
     hashed_course_id = hashlib.sha1(course_id).hexdigest()
     task = AnswerDistributionOneFilePerCourseTask(
         mapreduce_engine='local',
         src=None,
         dest=None,
         name='name',
         include=None,
         output_root='/tmp',
     )
     output_path = task.output_path_for_key(course_id)
     expected_output_path = '/tmp/{0}/foo_bar_baz_answer_distribution.csv'.format(
         hashed_course_id)
     self.assertEquals(output_path, expected_output_path)
 def test_delete_output_root(self):
     # It's still possible to use the delete option
     # to get rid of the output_root directory.
     task = AnswerDistributionOneFilePerCourseTask(
         mapreduce_engine='local',
         src=[],
         dest=None,
         name='name',
         include=[],
         output_root=self.output_root,
         delete_output_root="true",
         marker=self.output_root,
     )
     self.assertFalse(task.complete())
     self.assertFalse(os.path.exists(self.output_root))
 def test_output_path_for_legacy_key(self):
     course_id = 'foo/bar/baz'
     hashed_course_id = hashlib.sha1(course_id).hexdigest()
     task = AnswerDistributionOneFilePerCourseTask(
         mapreduce_engine='local',
         src=[],
         dest=None,
         name='name',
         include=[],
         output_root='/tmp',
     )
     output_path = task.output_path_for_key(course_id)
     expected_output_path = '/tmp/{0}/foo_bar_baz_answer_distribution.csv'.format(
         hashed_course_id)
     self.assertEquals(output_path, expected_output_path)
    def setUp(self):
        self.task_class = AnswerDistributionOneFilePerCourseTask
        super(AnswerDistributionOneFilePerCourseTaskTest, self).setUp()

        self.task = AnswerDistributionOneFilePerCourseTask(
            mapreduce_engine='local',
            src=[],
            dest=None,
            name=None,
            include=[],
            output_root=None,
        )
Beispiel #9
0
 def test_no_delete_output_root(self):
     # Not using the delete_output_root option will
     # not delete the output_root.
     self.assertTrue(os.path.exists(self.output_root))
     AnswerDistributionOneFilePerCourseTask(
         mapreduce_engine='local',
         src=None,
         dest=None,
         name='name',
         include=None,
         output_root=self.output_root,
     )
     self.assertTrue(os.path.exists(self.output_root))
Beispiel #10
0
class AnswerDistributionOneFilePerCourseTaskTest(MapperTestMixin,
                                                 ReducerTestMixin, TestCase):
    """Tests for AnswerDistributionOneFilePerCourseTask class."""
    def setUp(self):
        self.task_class = AnswerDistributionOneFilePerCourseTask
        super(AnswerDistributionOneFilePerCourseTaskTest, self).setUp()

        self.task = AnswerDistributionOneFilePerCourseTask(
            mapreduce_engine='local',
            src=None,
            dest=None,
            name=None,
            include=None,
            output_root=None,
        )

    def test_map_single_value(self):
        self.assert_single_map_output('foo\tbar', 'foo', 'bar')

    def test_reduce_multiple_values(self):
        field_names = AnswerDistributionPerCourseMixin.get_column_order()

        # To test sorting, the first sample is made to sort after the
        # second sample.
        column_values_2 = [(k, unicode(k) + u'\u2603') for k in field_names]
        column_values_2[3] = (column_values_2[3][0], 10)
        column_values_1 = list(column_values_2)
        column_values_1[4] = (column_values_1[4][0], u'ZZZZZZZZZZZ')
        sample_input_1 = json.dumps(dict(column_values_1))
        sample_input_2 = json.dumps(dict(column_values_2))
        mock_output_file = Mock()

        self.task.multi_output_reducer('foo',
                                       iter([sample_input_1, sample_input_2]),
                                       mock_output_file)

        expected_header_string = ','.join(field_names) + '\r\n'
        self.assertEquals(mock_output_file.write.mock_calls[0],
                          call(expected_header_string))

        # Confirm that the second sample appears before the first.
        expected_row_1 = ','.join(
            unicode(v[1]).encode('utf8') for v in column_values_2) + '\r\n'
        self.assertEquals(mock_output_file.write.mock_calls[1],
                          call(expected_row_1))
        expected_row_2 = ','.join(
            unicode(v[1]).encode('utf8') for v in column_values_1) + '\r\n'
        self.assertEquals(mock_output_file.write.mock_calls[2],
                          call(expected_row_2))

    def test_output_path_for_legacy_key(self):
        course_id = 'foo/bar/baz'
        hashed_course_id = hashlib.sha1(course_id).hexdigest()
        task = AnswerDistributionOneFilePerCourseTask(
            mapreduce_engine='local',
            src=None,
            dest=None,
            name='name',
            include=None,
            output_root='/tmp',
        )
        output_path = task.output_path_for_key(course_id)
        expected_output_path = '/tmp/{0}/foo_bar_baz_answer_distribution.csv'.format(
            hashed_course_id)
        self.assertEquals(output_path, expected_output_path)

    def test_output_path_for_opaque_key(self):
        course_id = str(CourseLocator(org='foo', course='bar', run='baz'))
        hashed_course_id = hashlib.sha1(course_id).hexdigest()
        task = AnswerDistributionOneFilePerCourseTask(
            mapreduce_engine='local',
            src=None,
            dest=None,
            name='name',
            include=None,
            output_root='/tmp',
        )
        output_path = task.output_path_for_key(course_id)
        expected_output_path = '/tmp/{0}/foo_bar_baz_answer_distribution.csv'.format(
            hashed_course_id)
        self.assertEquals(output_path, expected_output_path)
class AnswerDistributionOneFilePerCourseTaskTest(MapperTestMixin, ReducerTestMixin, TestCase):
    """Tests for AnswerDistributionOneFilePerCourseTask class."""

    def setUp(self):
        self.task_class = AnswerDistributionOneFilePerCourseTask
        super(AnswerDistributionOneFilePerCourseTaskTest, self).setUp()

        self.task = AnswerDistributionOneFilePerCourseTask(
            mapreduce_engine='local',
            src=[],
            dest=None,
            name=None,
            include=[],
            output_root=None,
        )

    def test_map_single_value(self):
        self.assert_single_map_output('foo\tbar', 'foo', 'bar')

    def test_reduce_multiple_values(self):
        field_names = AnswerDistributionPerCourseMixin.get_column_order()

        # To test sorting, the first sample is made to sort after the
        # second sample.
        column_values_2 = [(k, unicode(k) + u'\u2603') for k in field_names]
        column_values_2[3] = (column_values_2[3][0], 10)
        column_values_1 = list(column_values_2)
        column_values_1[4] = (column_values_1[4][0], u'ZZZZZZZZZZZ')
        sample_input_1 = json.dumps(dict(column_values_1))
        sample_input_2 = json.dumps(dict(column_values_2))
        mock_output_file = Mock()

        self.task.multi_output_reducer('foo', iter([sample_input_1, sample_input_2]), mock_output_file)

        expected_header_string = ','.join(field_names) + '\r\n'
        self.assertEquals(mock_output_file.write.mock_calls[0], call(expected_header_string))

        # Confirm that the second sample appears before the first.
        expected_row_1 = ','.join(unicode(v[1]).encode('utf8') for v in column_values_2) + '\r\n'
        self.assertEquals(mock_output_file.write.mock_calls[1], call(expected_row_1))
        expected_row_2 = ','.join(unicode(v[1]).encode('utf8') for v in column_values_1) + '\r\n'
        self.assertEquals(mock_output_file.write.mock_calls[2], call(expected_row_2))

    def test_output_path_for_legacy_key(self):
        course_id = 'foo/bar/baz'
        hashed_course_id = hashlib.sha1(course_id).hexdigest()
        task = AnswerDistributionOneFilePerCourseTask(
            mapreduce_engine='local',
            src=[],
            dest=None,
            name='name',
            include=[],
            output_root='/tmp',
        )
        output_path = task.output_path_for_key(course_id)
        expected_output_path = '/tmp/{0}/foo_bar_baz_answer_distribution.csv'.format(hashed_course_id)
        self.assertEquals(output_path, expected_output_path)

    def test_output_path_for_opaque_key(self):
        course_id = str(CourseLocator(org='foo', course='bar', run='baz'))
        hashed_course_id = hashlib.sha1(course_id).hexdigest()
        task = AnswerDistributionOneFilePerCourseTask(
            mapreduce_engine='local',
            src=[],
            dest=None,
            name='name',
            include=[],
            output_root='/tmp',
        )
        output_path = task.output_path_for_key(course_id)
        expected_output_path = '/tmp/{0}/foo_bar_baz_answer_distribution.csv'.format(hashed_course_id)
        self.assertEquals(output_path, expected_output_path)