def preview(self, mapper, kind, params): del params['preview'] if 'n' not in params: params['n'] = 10 # Set up a fake job context for the mapper job_config = mapper.launch(preview_only=True) context = map_module.get_fake_context(job_config) # Get some entities to preview. results = self.api.get(kind, **params) before = [e.to_client_dict() for e in results] results = [mapper.do(context, e) for e in results] after = [e.to_client_dict() for e in results] self.write({ 'preview': True, 'n': params['n'], 'before': before, 'after': after, 'message': ( "Warning: the results returned here are the result of a " "simple query-and-modify, not a true map reduce job. " "Also, no changes have been saved."), })
def test_lower_case_login(self): """Check that the mapper function properly changes all possible values of a user's auth_id and login_email.""" num_users = 100 email_length = 20 for x in range(num_users): # Generate a random user, sometimes with string data, sometimes # with None. chars = string.digits + string.letters + string.punctuation rand_str = ''.join(random.choice(chars) for x in range(20)) email = rand_str + '@a.aa' if random.random() > 0.5 else None auth_id = 'direct_' + rand_str if random.random() > 0.5 else None user = User(login_email=email, auth_id=auth_id) # Set up a fake job context for the mapper conf = map_module.lower_case_login(submit_job=False) context = map_module.get_fake_context(conf) # Manipulate the user mapper = map_module.LowerCaseLoginMapper() user = mapper.do(context, user) # Check that the user has been manipulated properly if user.auth_id is not None: self.assertEquals(user.auth_id, user.auth_id.lower()) if user.login_email is not None: self.assertEquals(user.login_email, user.login_email.lower())
def do(self): params = util.get_request_dictionary(self.request) # The list values must be given in the GET or POST as # 'list_values_json' so they are interpreted as a list by # util.get_request_dictionary. Check that the list came through. if type(params['list_values']) is not list: raise Exception("Parameter 'list_values_json' missing or invalid.") # Params not in this list will be used to filter previews. expected_keys = ['n', 'preview', 'list_name', 'list_values', 'salt'] # Don't run the map reduce job, just show a sample of what it # would do. if 'preview' in params and params['preview'] is True: n = int(params['n']) if 'n' in params else 100 # Set up a fake job context for the mapper conf = map_module.deidentify( params['list_name'], params['list_values'], params['salt'], submit_job=False) context = map_module.get_fake_context(conf) # This function will modify the user if they should be deidentified # (if the user has the specified relationship). mapper = map_module.DeidentifyMapper() # Get some entities to preview. query = id_model.User.all() for k, v in params.items(): if k not in expected_keys: query.filter(k + ' =', v) sample = query.fetch(n) before = [e.to_dict() for e in sample] results = [mapper.do(context, e) for e in sample] after = [e.to_dict() for e in results] return { 'success': True, 'preview': True, 'n': n, 'data': { 'before': before, 'after': after, }, 'message': ( "Warning: the results returned here are the result of a " "simple query-and-modify, not a true map reduce job. " "Also, no changes have been saved."), } # Run it for real else: conf = map_module.deidentify( params['list_name'], params['list_values'], params['salt']) return {'success': True, 'data': conf.job_id}
def do(self): params = util.get_request_dictionary(self.request) # Don't run the map reduce job, just show a sample of what it # would do. if 'preview' in params and params['preview'] is True: n = int(params['n']) if 'n' in params else 100 # Set up a fake job context for the mapper conf = map_module.lower_case_login(submit_job=False) context = map_module.get_fake_context(conf) # This function will modify the entities. mapper = map_module.LowerCaseLoginMapper() # Get some entities to preview. query = core.User.all() for k, v in params.items(): if k not in ['n', 'preview']: query.filter(k + ' =', v) sample = query.fetch(n) before = [e.to_dict() for e in sample] results = [mapper.do(context, e) for e in sample] after = [e.to_dict() for e in sample] return { 'success': True, 'preview': True, 'n': n, 'data': { 'before': before, 'after': after, }, 'message': ("Warning: the results returned here are the result of a " "simple query-and-modify, not a true map reduce job. " "Also, no changes have been saved."), } # Run it for real else: job_config = map_module.lower_case_login() return {'success': True, 'data': job_config.job_id}
def do(self, kind): # kind must be one of these if kind not in ['cohort', 'classroom']: raise Exception("Invalid kind: {}".format(kind)) params = util.get_request_dictionary(self.request) # Don't run the map reduce job, just show a sample of what it # would do. if 'preview' in params and params['preview'] is True: n = int(params['n']) if 'n' in params else 1 # Set up a fake job context for the mapper conf = map_module.cache_contents(kind, submit_job=False) context = map_module.get_fake_context(conf) # This function will cache rosters and schedules via # api.get_roster() mapper = map_module.CacheContentsMapper() # Get some entities to preview. klass = core.kind_to_class(kind) query = klass.all() sample = query.fetch(n) [mapper(context, e) for e in sample] return { 'success': True, 'preview': True, 'n': n, 'data': { 'entities_processed': [e.id for e in sample] }, 'message': ("Warning: this is the result of a limited preview. No " "system-wide mapreduce job has been run."), } # Run it for real else: # Actually have to run THREE mapreduce jobs, one for each kind. conf = map_module.cache_contents(kind) return {'success': True, 'data': conf.job_id}
def do(self): return {'success': False, 'message': "This tool needs further testing. See docstring of " "ModifyPdHandler and pull request #268."} params = util.get_request_dictionary(self.request) to_match = params['to_match'] to_change = params['to_change'] # Must be at least a school admin to run this. user = self.get_current_user() if user.user_type not in ['god', 'researcher', 'school_admin']: raise core.PermissionDenied() # Although this mapper is written very generally and is capable of # changing any property of pd entities, we want to artificially limit # it to changing cohort and classroom, b/c that's all that our use # cases require. allowed_keys = set(['classroom', 'cohort']) illegal_keys = set(to_change.keys()).difference(allowed_keys) if len(to_change.keys()) is 0 or len(illegal_keys) > 0: raise Exception("Not allowed to change {}".format(illegal_keys)) # You must, at minimum, specify a single cohort and single program # (not a list) in to_match, otherwise the scope of changes would be # out of control. if 'program' not in to_match or type(to_match['program']) is not unicode: raise Exception("Must specify a single program in to_match.") if 'cohort' not in to_match or type(to_match['cohort']) is not unicode: raise Exception("Must specify a single cohort in to_match.") # Check permissions. To run this job, the user must have permission on # any cohorts in either to_match or to_change. # These functions will raise their own exceptions if necessary. user.can_put_pd(to_match['program'], to_match['cohort']) if 'cohort' in to_change: user.can_put_pd(to_match['program'], to_change['cohort']) # Preview: don't run the map reduce job, just show a sample of what it # would do. if 'preview' in params and params['preview'] is True: n = int(params['n']) if 'n' in params else 100 # Set up a fake job context for the mapper conf = map_module.modify_pd(to_match, to_change, submit_job=False) context = map_module.get_fake_context(conf) # This function will modify the entities. mapper = map_module.ModifyPdMapper() # Get some entities to preview. query = id_model.Pd.all() for k, v in to_match.items(): if isinstance(v, list): # Limit the length of the list b/c app engine has issues. v = v[:30] query.filter(k + ' IN', v) else: query.filter(k + ' =', v) sample = query.fetch(n) before = [e.to_dict() for e in sample] results = [mapper.do(context, e) for e in sample] after = [e.to_dict() for e in sample] return { 'success': True, 'preview': True, 'n': n, 'data': { 'before': before, 'after': after, }, 'message': ( "Warning: the results returned here are the result of a " "simple query-and-modify, not a true map reduce job. " "Also, no changes have been saved."), } # Run it for real else: job_config = map_module.modify_pd(to_match, to_change) return {'success': True, 'data': job_config.job_id}
def get_fake_context(self, kind, mapper): conf = map_module.modify_kind(kind, mapper, submit_job=False) return map_module.get_fake_context(conf)
def do(self, kind): # kind must be one of these if kind not in ['user', 'activity', 'cohort']: raise Exception("Invalid kind: {}".format(kind)) params = util.get_request_dictionary(self.request) # Params not in this list will be used to filter previews. expected_keys = ['n', 'preview'] # Don't run the map reduce job, just show a sample of what it # would do. if 'preview' in params and params['preview'] is True: n = int(params['n']) if 'n' in params else 100 # Set up a fake job context for the mapper conf = map_module.fix_aggregation_json(kind, submit_job=False) context = map_module.get_fake_context(conf) # This function will modify the entity by copying aggregation data # to a new string property. mapper = map_module.AggregationJsonMapper() def summarize_entity(entity): return { 'id': entity.id, 'aggregation_data': entity.aggregation_data, 'aggregation_json': entity.aggregation_json } # Get some entities to preview. klass = core.kind_to_class(kind) query = klass.all() for k, v in params.items(): if k not in expected_keys: query.filter(k + ' =', v) sample = query.fetch(n) before = [summarize_entity(e) for e in sample] results = [mapper.do(context, e) for e in sample] after = [summarize_entity(e) for e in results] return { 'success': True, 'preview': True, 'n': n, 'data': { 'before': before, 'after': after, }, 'message': ("Warning: the results returned here are the result of a " "simple query-and-modify, not a true map reduce job. " "Also, no changes have been saved."), } # Run it for real else: # Actually have to run THREE mapreduce jobs, one for each kind. conf = map_module.fix_aggregation_json(kind) return {'success': True, 'data': conf.job_id}
def test_deidentify(self): """Check that the mapper function properly hashes requested users.""" # When running this for real, a secret random salt will be specified # by the adminstrator issuing the job. For this test, we'll use a # dummy value salt = u'salt' # Generate two (different) random cohort ids id1 = id2 = '' while id1 == id2: id1 = Cohort.generate_id(phrase.generate_phrase()) id2 = Cohort.generate_id(phrase.generate_phrase()) # Set up each way a user could be associated with an the cohort. loner = User( # "loner" b/c no cohort associations first_name=u"William", last_name=u"Clinton", login_email=u"", stripped_first_name=util.clean_string(u"William"), stripped_last_name=util.clean_string(u"Clinton"), name=u"William", birth_date=datetime.date(1946, 8, 19), auth_id="", title="President", phone="(202) 456-1111", notes="This is Bill Clinton.", user_type="student", ) standard = User( # "standard" b/c one cohort association first_name=u"George", last_name=u"Bush", login_email=u"", stripped_first_name=util.clean_string(u"George"), stripped_last_name=util.clean_string(u"Bush"), name=u"George", birth_date=datetime.date(1946, 7, 6), auth_id="", title="President", phone="(202) 456-1111", notes="This is George Bush Jr.", assc_cohort_list=[id1], user_type="student", ) dual = User( # "dual" b/c two cohort associations first_name=u"Ban Ki-moon", last_name=u"\uBC18\uAE30\uBB38", login_email=u"", stripped_first_name=util.clean_string(u"Ban Ki-moon"), stripped_last_name=util.clean_string(u"\uBC18\uAE30\uBB38"), name=u"Ban", birth_date=datetime.date(1944, 6, 13), auth_id="google_123445345738", title="Secretary General", phone="(212) 963 1234", notes="This is Ban Ki-moon.", assc_cohort_list=[id1, id2], user_type="student", ) adult = User( # "adult" b/c user type teacher first_name=u"Barack", last_name=u"Obama", login_email=u"", stripped_first_name=util.clean_string(u"Barack"), stripped_last_name=util.clean_string(u"Obama"), name=u"Barack", birth_date=datetime.date(1961, 8, 4), auth_id="", title="President", phone="(202) 456-1111", notes="This is Barack Obama.", assc_cohort_list=[id1], user_type="teacher", ) # Set up a fake job context for the mapper, requesting that all users # associated with the first cohort be deleted. conf = map_module.deidentify('assc_cohort_list', [id1], salt, submit_job=False) context = map_module.get_fake_context(conf) mapper = map_module.DeidentifyMapper() # Manipulate each user deidentified_loner = mapper.do(context, copy.deepcopy(loner)) deidentified_standard = mapper.do(context, copy.deepcopy(standard)) deidentified_dual = mapper.do(context, copy.deepcopy(dual)) deidentified_adult = mapper.do(context, copy.deepcopy(adult)) # Check that users not specified are not modified. self.assertEqual(loner, deidentified_loner) # Check that non-students are unchanged, even if they have the right # relationship. self.assertEqual(adult, deidentified_adult) # With modified users, these properties should be erased i.e. set to '' erased_properties = [ 'stripped_first_name', 'stripped_last_name', 'name', 'auth_id', 'title', 'phone', 'notes', 'auth_id' ] self.assertEqual(deidentified_standard.first_name, mapper.hash(u"George", salt)) self.assertEqual(deidentified_standard.last_name, mapper.hash(u"Bush", salt)) self.assertEqual(deidentified_standard.login_email, mapper.hash(u"", salt)) self.assertEqual(deidentified_standard.birth_date, datetime.date(1946, 7, 1)) for p in erased_properties: self.assertEqual(getattr(deidentified_standard, p), '') self.assertEqual(deidentified_dual.first_name, mapper.hash(u"Ban Ki-moon", salt)) self.assertEqual(deidentified_dual.last_name, mapper.hash(u"\uBC18\uAE30\uBB38", salt)) self.assertEqual(deidentified_dual.login_email, mapper.hash(u"", salt)) self.assertEqual(deidentified_dual.birth_date, datetime.date(1944, 6, 1)) for p in erased_properties: self.assertEqual(getattr(deidentified_dual, p), '') # If we run the process again, nothing should change b/c the job # should be idempotent. final_loner = mapper.do(context, copy.deepcopy(deidentified_loner)) final_standard = mapper.do(context, copy.deepcopy(deidentified_standard)) final_dual = mapper.do(context, copy.deepcopy(deidentified_dual)) final_adult = mapper.do(context, copy.deepcopy(deidentified_adult)) self.assertEqual(final_loner, deidentified_loner) self.assertEqual(final_standard, deidentified_standard) self.assertEqual(final_dual, deidentified_dual) self.assertEqual(final_adult, deidentified_adult)