Esempio n. 1
0
    def preview(self, mapper, kind, params):
        del params['preview']
        if 'n' not in params:
            params['n'] = 10

        # Set up a fake job context for the mapper
        job_config = mapper.launch(preview_only=True)
        context = map_module.get_fake_context(job_config)

        # Get some entities to preview.
        results = self.api.get(kind, **params)
        before = [e.to_client_dict() for e in results]

        results = [mapper.do(context, e) for e in results]
        after = [e.to_client_dict() for e in results]

        self.write({
            'preview': True,
            'n': params['n'],
            'before': before,
            'after': after,
            'message': (
                "Warning: the results returned here are the result of a "
                "simple query-and-modify, not a true map reduce job. "
                "Also, no changes have been saved."),
        })
    def test_lower_case_login(self):
        """Check that the mapper function properly changes all possible values
        of a user's auth_id and login_email."""
        num_users = 100
        email_length = 20

        for x in range(num_users):
            # Generate a random user, sometimes with string data, sometimes
            # with None.
            chars = string.digits + string.letters + string.punctuation
            rand_str = ''.join(random.choice(chars) for x in range(20))
            email = rand_str + '@a.aa' if random.random() > 0.5 else None
            auth_id = 'direct_' + rand_str if random.random() > 0.5 else None
            user = User(login_email=email, auth_id=auth_id)

            # Set up a fake job context for the mapper
            conf = map_module.lower_case_login(submit_job=False)
            context = map_module.get_fake_context(conf)

            # Manipulate the user
            mapper = map_module.LowerCaseLoginMapper()
            user = mapper.do(context, user)

            # Check that the user has been manipulated properly
            if user.auth_id is not None:
                self.assertEquals(user.auth_id, user.auth_id.lower())
            if user.login_email is not None:
                self.assertEquals(user.login_email, user.login_email.lower())
Esempio n. 3
0
    def do(self):
        params = util.get_request_dictionary(self.request)

        # The list values must be given in the GET or POST as
        # 'list_values_json' so they are interpreted as a list by
        # util.get_request_dictionary. Check that the list came through.
        if type(params['list_values']) is not list:
            raise Exception("Parameter 'list_values_json' missing or invalid.")

        # Params not in this list will be used to filter previews.
        expected_keys = ['n', 'preview', 'list_name', 'list_values', 'salt']

        # Don't run the map reduce job, just show a sample of what it
        # would do.
        if 'preview' in params and params['preview'] is True:
            n = int(params['n']) if 'n' in params else 100

            # Set up a fake job context for the mapper
            conf = map_module.deidentify(
                params['list_name'], params['list_values'], params['salt'],
                submit_job=False)
            context = map_module.get_fake_context(conf)

            # This function will modify the user if they should be deidentified
            # (if the user has the specified relationship).
            mapper = map_module.DeidentifyMapper()

            # Get some entities to preview.
            query = id_model.User.all()
            for k, v in params.items():
                if k not in expected_keys:
                    query.filter(k + ' =', v)
            sample = query.fetch(n)
            before = [e.to_dict() for e in sample]

            results = [mapper.do(context, e) for e in sample]
            after = [e.to_dict() for e in results]

            return {
                'success': True,
                'preview': True,
                'n': n,
                'data': {
                    'before': before,
                    'after': after,
                },
                'message': (
                    "Warning: the results returned here are the result of a "
                    "simple query-and-modify, not a true map reduce job. "
                    "Also, no changes have been saved."),
            }

        # Run it for real
        else:
            conf = map_module.deidentify(
                params['list_name'], params['list_values'], params['salt'])
            return {'success': True, 'data': conf.job_id}
Esempio n. 4
0
    def do(self):
        params = util.get_request_dictionary(self.request)

        # Don't run the map reduce job, just show a sample of what it
        # would do.
        if 'preview' in params and params['preview'] is True:
            n = int(params['n']) if 'n' in params else 100

            # Set up a fake job context for the mapper
            conf = map_module.lower_case_login(submit_job=False)
            context = map_module.get_fake_context(conf)

            # This function will modify the entities.
            mapper = map_module.LowerCaseLoginMapper()

            # Get some entities to preview.
            query = core.User.all()
            for k, v in params.items():
                if k not in ['n', 'preview']:
                    query.filter(k + ' =', v)
            sample = query.fetch(n)
            before = [e.to_dict() for e in sample]

            results = [mapper.do(context, e) for e in sample]
            after = [e.to_dict() for e in sample]

            return {
                'success':
                True,
                'preview':
                True,
                'n':
                n,
                'data': {
                    'before': before,
                    'after': after,
                },
                'message':
                ("Warning: the results returned here are the result of a "
                 "simple query-and-modify, not a true map reduce job. "
                 "Also, no changes have been saved."),
            }

        # Run it for real
        else:
            job_config = map_module.lower_case_login()
            return {'success': True, 'data': job_config.job_id}
Esempio n. 5
0
    def do(self, kind):
        # kind must be one of these
        if kind not in ['cohort', 'classroom']:
            raise Exception("Invalid kind: {}".format(kind))

        params = util.get_request_dictionary(self.request)

        # Don't run the map reduce job, just show a sample of what it
        # would do.
        if 'preview' in params and params['preview'] is True:
            n = int(params['n']) if 'n' in params else 1

            # Set up a fake job context for the mapper
            conf = map_module.cache_contents(kind, submit_job=False)
            context = map_module.get_fake_context(conf)

            # This function will cache rosters and schedules via
            # api.get_roster()
            mapper = map_module.CacheContentsMapper()

            # Get some entities to preview.
            klass = core.kind_to_class(kind)
            query = klass.all()
            sample = query.fetch(n)
            [mapper(context, e) for e in sample]

            return {
                'success':
                True,
                'preview':
                True,
                'n':
                n,
                'data': {
                    'entities_processed': [e.id for e in sample]
                },
                'message':
                ("Warning: this is the result of a limited preview. No "
                 "system-wide mapreduce job has been run."),
            }

        # Run it for real
        else:
            # Actually have to run THREE mapreduce jobs, one for each kind.
            conf = map_module.cache_contents(kind)
            return {'success': True, 'data': conf.job_id}
Esempio n. 6
0
    def do(self):
        return {'success': False,
                'message': "This tool needs further testing. See docstring of "
                           "ModifyPdHandler and pull request #268."}

        params = util.get_request_dictionary(self.request)
        to_match = params['to_match']
        to_change = params['to_change']

        # Must be at least a school admin to run this.
        user = self.get_current_user()
        if user.user_type not in ['god', 'researcher', 'school_admin']:
            raise core.PermissionDenied()

        # Although this mapper is written very generally and is capable of
        # changing any property of pd entities, we want to artificially limit
        # it to changing cohort and classroom, b/c that's all that our use
        # cases require.
        allowed_keys = set(['classroom', 'cohort'])
        illegal_keys = set(to_change.keys()).difference(allowed_keys)
        if len(to_change.keys()) is 0 or len(illegal_keys) > 0:
            raise Exception("Not allowed to change {}".format(illegal_keys))

        # You must, at minimum, specify a single cohort and single program
        # (not a list) in to_match, otherwise the scope of changes would be
        # out of control.
        if 'program' not in to_match or type(to_match['program']) is not unicode:
            raise Exception("Must specify a single program in to_match.")
        if 'cohort' not in to_match or type(to_match['cohort']) is not unicode:
            raise Exception("Must specify a single cohort in to_match.")

        # Check permissions. To run this job, the user must have permission on
        # any cohorts in either to_match or to_change.
        # These functions will raise their own exceptions if necessary.
        user.can_put_pd(to_match['program'], to_match['cohort'])
        if 'cohort' in to_change:
            user.can_put_pd(to_match['program'], to_change['cohort'])

        # Preview: don't run the map reduce job, just show a sample of what it
        # would do.
        if 'preview' in params and params['preview'] is True:
            n = int(params['n']) if 'n' in params else 100

            # Set up a fake job context for the mapper
            conf = map_module.modify_pd(to_match, to_change, submit_job=False)
            context = map_module.get_fake_context(conf)

            # This function will modify the entities.
            mapper = map_module.ModifyPdMapper()

            # Get some entities to preview.
            query = id_model.Pd.all()
            for k, v in to_match.items():
                if isinstance(v, list):
                    # Limit the length of the list b/c app engine has issues.
                    v = v[:30]
                    query.filter(k + ' IN', v)
                else:
                    query.filter(k + ' =', v)
            sample = query.fetch(n)
            before = [e.to_dict() for e in sample]

            results = [mapper.do(context, e) for e in sample]
            after = [e.to_dict() for e in sample]

            return {
                'success': True,
                'preview': True,
                'n': n,
                'data': {
                    'before': before,
                    'after': after,
                },
                'message': (
                    "Warning: the results returned here are the result of a "
                    "simple query-and-modify, not a true map reduce job. "
                    "Also, no changes have been saved."),
            }

        # Run it for real
        else:
            job_config = map_module.modify_pd(to_match, to_change)
            return {'success': True, 'data': job_config.job_id}
Esempio n. 7
0
 def get_fake_context(self, kind, mapper):
     conf = map_module.modify_kind(kind, mapper, submit_job=False)
     return map_module.get_fake_context(conf)
Esempio n. 8
0
    def do(self, kind):
        # kind must be one of these
        if kind not in ['user', 'activity', 'cohort']:
            raise Exception("Invalid kind: {}".format(kind))

        params = util.get_request_dictionary(self.request)

        # Params not in this list will be used to filter previews.
        expected_keys = ['n', 'preview']

        # Don't run the map reduce job, just show a sample of what it
        # would do.
        if 'preview' in params and params['preview'] is True:
            n = int(params['n']) if 'n' in params else 100

            # Set up a fake job context for the mapper
            conf = map_module.fix_aggregation_json(kind, submit_job=False)
            context = map_module.get_fake_context(conf)

            # This function will modify the entity by copying aggregation data
            # to a new string property.
            mapper = map_module.AggregationJsonMapper()

            def summarize_entity(entity):
                return {
                    'id': entity.id,
                    'aggregation_data': entity.aggregation_data,
                    'aggregation_json': entity.aggregation_json
                }

            # Get some entities to preview.
            klass = core.kind_to_class(kind)
            query = klass.all()
            for k, v in params.items():
                if k not in expected_keys:
                    query.filter(k + ' =', v)
            sample = query.fetch(n)
            before = [summarize_entity(e) for e in sample]

            results = [mapper.do(context, e) for e in sample]

            after = [summarize_entity(e) for e in results]

            return {
                'success':
                True,
                'preview':
                True,
                'n':
                n,
                'data': {
                    'before': before,
                    'after': after,
                },
                'message':
                ("Warning: the results returned here are the result of a "
                 "simple query-and-modify, not a true map reduce job. "
                 "Also, no changes have been saved."),
            }

        # Run it for real
        else:
            # Actually have to run THREE mapreduce jobs, one for each kind.
            conf = map_module.fix_aggregation_json(kind)
            return {'success': True, 'data': conf.job_id}
    def test_deidentify(self):
        """Check that the mapper function properly hashes requested users."""
        # When running this for real, a secret random salt will be specified
        # by the adminstrator issuing the job. For this test, we'll use a
        # dummy value
        salt = u'salt'

        # Generate two (different) random cohort ids
        id1 = id2 = ''
        while id1 == id2:
            id1 = Cohort.generate_id(phrase.generate_phrase())
            id2 = Cohort.generate_id(phrase.generate_phrase())

        # Set up each way a user could be associated with an the cohort.
        loner = User(  # "loner" b/c no cohort associations
            first_name=u"William",
            last_name=u"Clinton",
            login_email=u"",
            stripped_first_name=util.clean_string(u"William"),
            stripped_last_name=util.clean_string(u"Clinton"),
            name=u"William",
            birth_date=datetime.date(1946, 8, 19),
            auth_id="",
            title="President",
            phone="(202) 456-1111",
            notes="This is Bill Clinton.",
            user_type="student",
        )
        standard = User(  # "standard" b/c one cohort association
            first_name=u"George",
            last_name=u"Bush",
            login_email=u"",
            stripped_first_name=util.clean_string(u"George"),
            stripped_last_name=util.clean_string(u"Bush"),
            name=u"George",
            birth_date=datetime.date(1946, 7, 6),
            auth_id="",
            title="President",
            phone="(202) 456-1111",
            notes="This is George Bush Jr.",
            assc_cohort_list=[id1],
            user_type="student",
        )
        dual = User(  # "dual" b/c two cohort associations
            first_name=u"Ban Ki-moon",
            last_name=u"\uBC18\uAE30\uBB38",
            login_email=u"",
            stripped_first_name=util.clean_string(u"Ban Ki-moon"),
            stripped_last_name=util.clean_string(u"\uBC18\uAE30\uBB38"),
            name=u"Ban",
            birth_date=datetime.date(1944, 6, 13),
            auth_id="google_123445345738",
            title="Secretary General",
            phone="(212) 963 1234",
            notes="This is Ban Ki-moon.",
            assc_cohort_list=[id1, id2],
            user_type="student",
        )
        adult = User(  # "adult" b/c user type teacher
            first_name=u"Barack",
            last_name=u"Obama",
            login_email=u"",
            stripped_first_name=util.clean_string(u"Barack"),
            stripped_last_name=util.clean_string(u"Obama"),
            name=u"Barack",
            birth_date=datetime.date(1961, 8, 4),
            auth_id="",
            title="President",
            phone="(202) 456-1111",
            notes="This is Barack Obama.",
            assc_cohort_list=[id1],
            user_type="teacher",
        )

        # Set up a fake job context for the mapper, requesting that all users
        # associated with the first cohort be deleted.
        conf = map_module.deidentify('assc_cohort_list', [id1],
                                     salt,
                                     submit_job=False)
        context = map_module.get_fake_context(conf)
        mapper = map_module.DeidentifyMapper()

        # Manipulate each user
        deidentified_loner = mapper.do(context, copy.deepcopy(loner))
        deidentified_standard = mapper.do(context, copy.deepcopy(standard))
        deidentified_dual = mapper.do(context, copy.deepcopy(dual))
        deidentified_adult = mapper.do(context, copy.deepcopy(adult))

        # Check that users not specified are not modified.
        self.assertEqual(loner, deidentified_loner)

        # Check that non-students are unchanged, even if they have the right
        # relationship.
        self.assertEqual(adult, deidentified_adult)

        # With modified users, these properties should be erased i.e. set to ''
        erased_properties = [
            'stripped_first_name', 'stripped_last_name', 'name', 'auth_id',
            'title', 'phone', 'notes', 'auth_id'
        ]

        self.assertEqual(deidentified_standard.first_name,
                         mapper.hash(u"George", salt))
        self.assertEqual(deidentified_standard.last_name,
                         mapper.hash(u"Bush", salt))
        self.assertEqual(deidentified_standard.login_email,
                         mapper.hash(u"", salt))
        self.assertEqual(deidentified_standard.birth_date,
                         datetime.date(1946, 7, 1))

        for p in erased_properties:
            self.assertEqual(getattr(deidentified_standard, p), '')

        self.assertEqual(deidentified_dual.first_name,
                         mapper.hash(u"Ban Ki-moon", salt))
        self.assertEqual(deidentified_dual.last_name,
                         mapper.hash(u"\uBC18\uAE30\uBB38", salt))
        self.assertEqual(deidentified_dual.login_email, mapper.hash(u"", salt))
        self.assertEqual(deidentified_dual.birth_date,
                         datetime.date(1944, 6, 1))

        for p in erased_properties:
            self.assertEqual(getattr(deidentified_dual, p), '')

        # If we run the process again, nothing should change b/c the job
        # should be idempotent.
        final_loner = mapper.do(context, copy.deepcopy(deidentified_loner))
        final_standard = mapper.do(context,
                                   copy.deepcopy(deidentified_standard))
        final_dual = mapper.do(context, copy.deepcopy(deidentified_dual))
        final_adult = mapper.do(context, copy.deepcopy(deidentified_adult))

        self.assertEqual(final_loner, deidentified_loner)
        self.assertEqual(final_standard, deidentified_standard)
        self.assertEqual(final_dual, deidentified_dual)
        self.assertEqual(final_adult, deidentified_adult)