コード例 #1
0
 def test_get_filename_for_invalid_id(self):
     self.assertEquals(
         opaque_key_util.get_filename_safe_course_id(
             INVALID_LEGACY_COURSE_ID), "org_course_id_course_run")
     self.assertEquals(
         opaque_key_util.get_filename_safe_course_id(
             INVALID_LEGACY_COURSE_ID, '-'), "org-course_id-course_run")
コード例 #2
0
 def test_get_filename(self):
     self.assertEquals(
         opaque_key_util.get_filename_safe_course_id(VALID_COURSE_ID),
         "org_course_id_course_run")
     self.assertEquals(
         opaque_key_util.get_filename_safe_course_id(VALID_COURSE_ID, '-'),
         "org-course_id-course_run")
コード例 #3
0
 def test_get_filename_with_default_separator(
         self, course_id, expected_filename, expected_filename_with_hyphen):
     self.assertEquals(
         opaque_key_util.get_filename_safe_course_id(course_id),
         expected_filename)
     self.assertEquals(
         opaque_key_util.get_filename_safe_course_id(course_id, '-'),
         expected_filename_with_hyphen)
コード例 #4
0
 def test_get_filename_for_invalid_id(self):
     self.assertEquals(
         opaque_key_util.get_filename_safe_course_id(INVALID_LEGACY_COURSE_ID),
         "org_course_id_course_run"
     )
     self.assertEquals(
         opaque_key_util.get_filename_safe_course_id(INVALID_LEGACY_COURSE_ID, '-'),
         "org-course_id-course_run"
     )
コード例 #5
0
 def test_get_filename_for_nonascii_id(self):
     self.assertEquals(
         opaque_key_util.get_filename_safe_course_id(NONASCII_LEGACY_COURSE_ID),
         u"org_course\ufffd_id_course_run"
     )
     self.assertEquals(
         opaque_key_util.get_filename_safe_course_id(NONASCII_LEGACY_COURSE_ID, '-'),
         u"org-course\ufffd_id-course_run"
     )
コード例 #6
0
 def test_get_filename_with_colon(self):
     course_id = unicode(
         CourseLocator(org='org', course='course:id', run='course:run'))
     self.assertEquals(
         opaque_key_util.get_filename_safe_course_id(VALID_COURSE_ID),
         "org_course_id_course_run")
     self.assertEquals(
         opaque_key_util.get_filename_safe_course_id(course_id, '-'),
         "org-course-id-course-run")
コード例 #7
0
 def setUp(self):
     super(DeidentificationAcceptanceTest, self).setUp()
     self.temporary_dir = tempfile.mkdtemp()
     self.addCleanup(shutil.rmtree, self.temporary_dir)
     self.dump_root = url_path_join(self.test_src, 'course_exports', 'raw')
     self.filename_safe_course_id = get_filename_safe_course_id(self.COURSE_ID)
     self.test_gpg_key_dir = url_path_join(self.test_root, 'gpg-keys')
コード例 #8
0
 def __init__(self, *args, **kwargs):
     super(ObfuscatedPackageTask, self).__init__(*args, **kwargs)
     self.filename_safe_course_id = opaque_key_util.get_filename_safe_course_id(
         self.course)
     self.course_files_url = url_path_join(self.obfuscated_output_root,
                                           self.format_version,
                                           self.filename_safe_course_id)
コード例 #9
0
 def setUp(self):
     super(ObfuscationAcceptanceTest, self).setUp()
     self.temporary_dir = tempfile.mkdtemp()
     self.addCleanup(shutil.rmtree, self.temporary_dir)
     self.dump_root = url_path_join(self.test_src, 'course_exports', 'raw')
     self.filename_safe_course_id = get_filename_safe_course_id(self.COURSE_ID)
     self.test_gpg_key_dir = url_path_join(self.test_root, 'gpg-keys')
コード例 #10
0
    def __init__(self, *args, **kwargs):
        super(ObfuscatedCourseDumpTask, self).__init__(*args, **kwargs)

        filename_safe_course_id = opaque_key_util.get_filename_safe_course_id(
            self.course)
        dump_path = url_path_join(self.dump_root, filename_safe_course_id,
                                  'state')
        auth_userprofile_targets = PathSetTask(
            [dump_path], ['*auth_userprofile*']).output()
        # TODO: Refactor out this logic of getting latest file. Right now we expect a date, so we use that
        dates = [
            target.path.rsplit('/', 2)[-2]
            for target in auth_userprofile_targets
        ]
        # TODO: Make the date a parameter that defaults to the most recent, but allows the user to override?
        # This should return an error if no data is found, rather than getting a cryptic 'index out of range' error.
        if len(dates) == 0:
            raise Exception(
                'Missing auth_userprofile data file in {}'.format(dump_path))
        latest_date = sorted(dates)[-1]
        self.data_directory = url_path_join(self.dump_root,
                                            filename_safe_course_id, 'state',
                                            latest_date)
        self.output_directory = url_path_join(self.output_root,
                                              filename_safe_course_id, 'state',
                                              latest_date)
コード例 #11
0
 def setUp(self):
     super(ObfuscationAcceptanceTest, self).setUp()
     self.temporary_dir = tempfile.mkdtemp()
     self.addCleanup(shutil.rmtree, self.temporary_dir)
     self.dump_root = url_path_join(self.test_src, "course_exports", "raw")
     self.filename_safe_course_id = get_filename_safe_course_id(self.COURSE_ID)
     self.test_gpg_key_dir = url_path_join(self.test_root, "gpg-keys")
コード例 #12
0
 def test_get_filename_for_nonascii_id(self):
     self.assertEquals(
         opaque_key_util.get_filename_safe_course_id(
             VALID_NONASCII_LEGACY_COURSE_ID), u"org_cours__id_course_run")
     self.assertEquals(
         opaque_key_util.get_filename_safe_course_id(
             VALID_NONASCII_LEGACY_COURSE_ID, '-'),
         u"org-cours-_id-course_run")
     self.assertEquals(
         opaque_key_util.get_filename_safe_course_id(
             INVALID_NONASCII_LEGACY_COURSE_ID),
         u"org_course__id_course_run")
     self.assertEquals(
         opaque_key_util.get_filename_safe_course_id(
             INVALID_NONASCII_LEGACY_COURSE_ID, '-'),
         u"org-course-_id-course_run")
コード例 #13
0
    def run_task(self):
        """Runs the task with fake targets."""

        output_archive_root = tempfile.mkdtemp()
        self.addCleanup(shutil.rmtree, output_archive_root)

        with tempfile.NamedTemporaryFile() as tmp_input_archive:
            with tarfile.open(mode='w:gz', fileobj=tmp_input_archive) as input_archive_file:
                input_archive_file.add(self.archive_root, arcname='')
            tmp_input_archive.seek(0)

            task = obfuscate.CourseContentTask(
                course=sentinel.ignored,
                output_directory=sentinel.ignored,
                data_directory=sentinel.ignored,
                auth_user_path=sentinel.ignored,
                auth_userprofile_path=sentinel.ignored,
            )

            fake_input = {'data': [LocalTarget(path=tmp_input_archive.name)]}
            task.input = MagicMock(return_value=fake_input)

            output_target = FakeTarget()
            task.output = MagicMock(return_value=output_target)
            task.user_info_requirements = get_mock_user_info_requirements()
            reset_user_info_for_testing()
            task.run()

            with tarfile.open(mode='r:gz', fileobj=output_target.buffer) as output_archive_file:
                output_archive_file.extractall(output_archive_root)

        self.output_course_root = os.path.join(output_archive_root, get_filename_safe_course_id(self.COURSE_ID))
コード例 #14
0
 def output_path_for_key(self, course_id):
     filename_safe_course_id = opaque_key_util.get_filename_safe_course_id(course_id, '_')
     filename = u'{course_id}_enroll_validated_{dumpdate}.log.gz'.format(
         course_id=filename_safe_course_id,
         dumpdate=self.dump_date,
     )
     return url_path_join(self.output_root, filename)
コード例 #15
0
    def output_path_for_key(self, course_id):
        template = "{course_id}-courseware_studentmodule-{suffix}analytics.sql"

        filename = template.format(
            course_id=opaque_key_util.get_filename_safe_course_id(course_id, '-'),
            suffix=(self.output_suffix + '-') if self.output_suffix else ''
        )

        return url_path_join(self.output_root, filename)
コード例 #16
0
    def output_path_for_key(self, course_id):
        template = "{course_id}-courseware_studentmodule-{suffix}analytics.sql"

        filename = template.format(
            course_id=opaque_key_util.get_filename_safe_course_id(
                course_id, '-'),
            suffix=(self.output_suffix + '-') if self.output_suffix else '')

        return url_path_join(self.output_root, filename)
コード例 #17
0
    def requires_local(self):
        filename_safe_course_id = opaque_key_util.get_filename_safe_course_id(self.course)
        results = {
            'auth_user': PathSetTask([url_path_join(self.dump_root, filename_safe_course_id, 'state')], ['*-auth_user-*'])
        }
        if os.path.basename(self.explicit_event_whitelist) != self.explicit_event_whitelist:
            results['explicit_events'] = ExternalURL(url=self.explicit_event_whitelist)

        return results
コード例 #18
0
 def create_paths(self, course, dates):
     """Setups directory structure and files as expected by DeidentifyCourseDumpTask task."""
     self.temp_rootdir = tempfile.mkdtemp()
     self.dump_root = os.path.join(self.temp_rootdir, "dump_root")
     self.output_root = os.path.join(self.temp_rootdir, "output_root")
     filename_safe_course_id = opaque_key_util.get_filename_safe_course_id(course)
     for date in dates:
         filepath = os.path.join(self.dump_root, filename_safe_course_id, 'state', date, 'auth_userprofile_file')
         os.makedirs(os.path.dirname(filepath))
         open(filepath, 'a').close()
コード例 #19
0
    def output_path_for_key(self, key):
        filename_safe_course_id = opaque_key_util.get_filename_safe_course_id(
            self.course)

        return url_path_join(
            self.output_root, filename_safe_course_id, 'events',
            '{course}-events-{date}.log.gz'.format(
                course=filename_safe_course_id,
                date=key,
            ))
コード例 #20
0
    def output_path_for_key(self, key):
        date, course_id = key
        filename_safe_course_id = opaque_key_util.get_filename_safe_course_id(course_id)

        return url_path_join(
            self.output_root,
            filename_safe_course_id,
            "events",
            "{course}-events-{date}.log.gz".format(course=filename_safe_course_id, date=date),
        )
コード例 #21
0
ファイル: answer_dist.py プロジェクト: kolwinb/edx-insights
    def output_path_for_key(self, course_id):
        """
        Match the course folder hierarchy that is expected by the instructor dashboard.

        The instructor dashboard expects the file to be stored in a folder named sha1(course_id).  All files in that
        directory will be displayed on the instructor dashboard for that course.
        """
        hashed_course_id = hashlib.sha1(course_id).hexdigest()
        filename_safe_course_id = opaque_key_util.get_filename_safe_course_id(course_id, '_')
        filename = u'{course_id}_answer_distribution.csv'.format(course_id=filename_safe_course_id)
        return url_path_join(self.output_root, hashed_course_id, filename)
コード例 #22
0
    def output_path_for_key(self, key):
        date, course_id = key
        filename_safe_course_id = opaque_key_util.get_filename_safe_course_id(
            course_id)

        return url_path_join(
            self.output_root, filename_safe_course_id, "events",
            '{course}-events-{date}.log.gz'.format(
                course=filename_safe_course_id,
                date=date,
            ))
コード例 #23
0
    def output_path_for_key(self, course_id):
        """
        Match the course folder hierarchy that is expected by the instructor dashboard.

        The instructor dashboard expects the file to be stored in a folder named sha1(course_id).  All files in that
        directory will be displayed on the instructor dashboard for that course.
        """
        hashed_course_id = hashlib.sha1(course_id).hexdigest()
        filename_safe_course_id = opaque_key_util.get_filename_safe_course_id(course_id, '_')
        filename = u'{course_id}_answer_distribution.csv'.format(course_id=filename_safe_course_id)
        return url_path_join(self.output_root, hashed_course_id, filename)
コード例 #24
0
 def create_paths(self, course, dates):
     """Setups directory structure and files as expected by ObfuscateCourseDumpTask task."""
     self.temp_rootdir = tempfile.mkdtemp()
     self.dump_root = os.path.join(self.temp_rootdir, "dump_root")
     self.output_root = os.path.join(self.temp_rootdir, "output_root")
     filename_safe_course_id = get_filename_safe_course_id(course)
     for date in dates:
         filepath = os.path.join(self.dump_root, filename_safe_course_id,
                                 'state', date, 'auth_userprofile_file')
         os.makedirs(os.path.dirname(filepath))
         open(filepath, 'a').close()
コード例 #25
0
    def __init__(self, *args, **kwargs):
        super(DeidentifiedCourseDumpTask, self).__init__(*args, **kwargs)

        filename_safe_course_id = opaque_key_util.get_filename_safe_course_id(self.course)
        auth_userprofile_targets = PathSetTask([url_path_join(self.dump_root, filename_safe_course_id, 'state')],
                                               ['*auth_userprofile*']).output()
        # TODO: Refactor out this logic of getting latest file. Right now we expect a date, so we use that
        dates = [re.search(r"\d{4}-\d{2}-\d{2}", target.path).group() for target in auth_userprofile_targets]
        # TODO: Make the date a parameter that defaults to the most recent, but allows the user to override?
        latest_date = sorted(dates)[-1]
        self.data_directory = url_path_join(self.dump_root, filename_safe_course_id, 'state', latest_date)
        self.output_directory = url_path_join(self.output_root, filename_safe_course_id, 'state', latest_date)
コード例 #26
0
    def output_path_for_key(self, course_id):
        """
        Match the course folder hierarchy that is expected by the Analytics API.

        The Analytics API expects the problem response files to be stored in a
        folder named by the course_id, so we sanitize it to create the filename.
        """
        if course_id:
            safe_course_id = get_filename_safe_course_id(course_id)
            filename = self.report_filename_template.format(course_id=safe_course_id)
            return url_path_join(self.output_root, filename)
        return None
コード例 #27
0
    def output_path_for_key(self, key):
        filename_safe_course_id = opaque_key_util.get_filename_safe_course_id(self.course)

        return url_path_join(
            self.output_root,
            filename_safe_course_id,
            'events',
            '{course}-events-{date}.log.gz'.format(
                course=filename_safe_course_id,
                date=key,
            )
        )
コード例 #28
0
    def __init__(self, *args, **kwargs):
        super(ObfuscatedCourseDumpTask, self).__init__(*args, **kwargs)

        filename_safe_course_id = opaque_key_util.get_filename_safe_course_id(self.course)
        dump_path = url_path_join(self.dump_root, filename_safe_course_id, 'state')
        auth_userprofile_targets = PathSetTask([dump_path], ['*auth_userprofile*']).output()
        # TODO: Refactor out this logic of getting latest file. Right now we expect a date, so we use that
        dates = [target.path.rsplit('/', 2)[-2] for target in auth_userprofile_targets]
        # TODO: Make the date a parameter that defaults to the most recent, but allows the user to override?
        # This should return an error if no data is found, rather than getting a cryptic 'index out of range' error.
        if len(dates) == 0:
            raise Exception('Missing auth_userprofile data file in {}'.format(dump_path))
        latest_date = sorted(dates)[-1]
        self.data_directory = url_path_join(self.dump_root, filename_safe_course_id, 'state', latest_date)
        self.output_directory = url_path_join(self.output_root, filename_safe_course_id, 'state', latest_date)
コード例 #29
0
    def setUp(self):
        self.archive_root = tempfile.mkdtemp()
        self.addCleanup(shutil.rmtree, self.archive_root)

        course_id_filename = get_filename_safe_course_id(self.COURSE_ID)
        self.course_root = os.path.join(self.archive_root, course_id_filename)
        os.makedirs(self.course_root)

        with open(os.path.join(self.course_root, 'course.xml'), 'w') as course_file:
            course_file.write('<course url_name="foo" org="edX" course="DemoX"/>')

        policy_dir_path = os.path.join(self.course_root, 'policies', 'foo')
        os.makedirs(policy_dir_path)
        with open(os.path.join(policy_dir_path, 'policy.json'), 'w') as policy_file:
            json.dump({}, policy_file)
コード例 #30
0
    def test_database_export(self):
        # An S3 bucket to store the output in.
        assert ('exporter_output_bucket' in self.config)

        self.load_data_from_file()
        self.run_export_task()

        for course_id in [self.COURSE_ID2, self.COURSE_ID]:
            org_id = get_org_id_for_course(course_id).lower()
            self.run_legacy_exporter(org_id, course_id)

            exported_filename = '{safe_course_id}-{table}-{suffix}-analytics.sql'.format(
                safe_course_id=get_filename_safe_course_id(course_id, '-'),
                table=self.TABLE,
                suffix=self.ENVIRONMENT,
            )
            self.validate_exporter_output(org_id, exported_filename)
コード例 #31
0
    def test_database_export(self):
        # An S3 bucket to store the output in.
        assert('exporter_output_bucket' in self.config)

        self.load_data_from_file()
        self.run_export_task()

        for course_id in [self.COURSE_ID2, self.COURSE_ID]:
            org_id = get_org_id_for_course(course_id).lower()
            self.run_legacy_exporter(org_id, course_id)

            exported_filename = '{safe_course_id}-{table}-{suffix}-analytics.sql'.format(
                safe_course_id=get_filename_safe_course_id(course_id, '-'),
                table=self.TABLE,
                suffix=self.ENVIRONMENT,
            )
            self.validate_exporter_output(org_id, exported_filename)
コード例 #32
0
    def setUp(self):
        self.archive_root = tempfile.mkdtemp()
        self.addCleanup(shutil.rmtree, self.archive_root)

        course_id_filename = get_filename_safe_course_id(self.COURSE_ID)
        self.course_root = os.path.join(self.archive_root, course_id_filename)
        os.makedirs(self.course_root)

        with open(os.path.join(self.course_root, 'course.xml'),
                  'w') as course_file:
            course_file.write(
                '<course url_name="foo" org="edX" course="DemoX"/>')

        policy_dir_path = os.path.join(self.course_root, 'policies', 'foo')
        os.makedirs(policy_dir_path)
        with open(os.path.join(policy_dir_path, 'policy.json'),
                  'w') as policy_file:
            json.dump({}, policy_file)
コード例 #33
0
    def run_task(self):
        """Runs the task with fake targets."""

        output_archive_root = tempfile.mkdtemp()
        self.addCleanup(shutil.rmtree, output_archive_root)

        with tempfile.NamedTemporaryFile() as tmp_input_archive:
            with tarfile.open(mode='w:gz',
                              fileobj=tmp_input_archive) as input_archive_file:
                input_archive_file.add(self.archive_root, arcname='')
            tmp_input_archive.seek(0)

            task = obfuscate.CourseContentTask(
                course=sentinel.ignored,
                output_directory=sentinel.ignored,
                data_directory=sentinel.ignored,
                auth_user_path=sentinel.ignored,
                auth_userprofile_path=sentinel.ignored,
            )

            fake_input = {'data': [LocalTarget(path=tmp_input_archive.name)]}
            task.input = MagicMock(return_value=fake_input)

            output_target = FakeTarget()
            task.output = MagicMock(return_value=output_target)
            task.user_info_requirements = get_mock_user_info_requirements()
            reset_user_info_for_testing()
            task.run()

            with tarfile.open(
                    mode='r:gz',
                    fileobj=output_target.buffer) as output_archive_file:
                output_archive_file.extractall(output_archive_root)

        self.output_course_root = os.path.join(
            output_archive_root, get_filename_safe_course_id(self.COURSE_ID))
コード例 #34
0
 def test_get_filename(self):
     self.assertEquals(opaque_key_util.get_filename_safe_course_id(VALID_COURSE_ID), "org_course_id_course_run")
     self.assertEquals(opaque_key_util.get_filename_safe_course_id(VALID_COURSE_ID, '-'), "org-course_id-course_run")
コード例 #35
0
 def test_get_filename_with_colon(self):
     course_id = unicode(CourseLocator(org='org', course='course:id', run='course:run'))
     self.assertEquals(opaque_key_util.get_filename_safe_course_id(VALID_COURSE_ID), "org_course_id_course_run")
     self.assertEquals(opaque_key_util.get_filename_safe_course_id(course_id, '-'), "org-course-id-course-run")
コード例 #36
0
 def test_get_filename_with_default_separator(self, course_id, expected_filename, expected_filename_with_hyphen):
     self.assertEquals(opaque_key_util.get_filename_safe_course_id(course_id), expected_filename)
     self.assertEquals(opaque_key_util.get_filename_safe_course_id(course_id, '-'), expected_filename_with_hyphen)
コード例 #37
0
 def output(self):
     filename_safe_course_id = opaque_key_util.get_filename_safe_course_id(self.course)
     return get_target_from_url(url_path_join(
         self.obfuscated_output_root, self.format_version, filename_safe_course_id, 'metadata_file.json'
     ))
コード例 #38
0
 def __init__(self, *args, **kwargs):
     super(ObfuscatedPackageTask, self).__init__(*args, **kwargs)
     self.filename_safe_course_id = opaque_key_util.get_filename_safe_course_id(self.course)
     self.course_files_url = url_path_join(
         self.obfuscated_output_root, self.format_version, self.filename_safe_course_id
     )
コード例 #39
0
 def requires(self):
     filename_safe_course_id = opaque_key_util.get_filename_safe_course_id(
         self.course)
     event_files_url = url_path_join(self.dump_root,
                                     filename_safe_course_id, 'events')
     return PathSetTask([event_files_url], ['*'])
コード例 #40
0
 def output(self):
     filename_safe_course_id = opaque_key_util.get_filename_safe_course_id(
         self.course)
     return get_target_from_url(
         url_path_join(self.obfuscated_output_root, self.format_version,
                       filename_safe_course_id, 'metadata_file.json'))
コード例 #41
0
 def requires(self):
     filename_safe_course_id = opaque_key_util.get_filename_safe_course_id(self.course)
     event_files_url = url_path_join(self.dump_root, filename_safe_course_id, 'events')
     return PathSetTask([event_files_url], ['*'])