def ScrubberConfigFromJson(codebase, input_files, config_json, extension_to_scrubber_map=None, default_scrubbers=None, modify=False, output_tar='', temp_dir='', **unused_kwargs): """Generate a ScrubberConfig object from a ScrubberConfig JSON object.""" def SetOption(key, func=None): """Set an option in the config from JSON, using the enclosing scope. Args: key: unicode; the key in the JSON config and corresponding config attribute name. func: An optional transformation to apply to the JSON value before storing in the config. """ if key in config_json: value = config_json[key] if func is not None: value = func(value) setattr(config, str(key), value) config_utils.CheckJsonKeys('scrubber config', config_json, _SCRUBBER_CONFIG_KEYS) config = ScrubberConfig(codebase, input_files, extension_to_scrubber_map, default_scrubbers, modify, output_tar, temp_dir) # General options. SetOption(u'ignore_files_re', func=re.compile) SetOption(u'do_not_scrub_files_re', func=re.compile) SetOption(u'sensitive_words') config.sensitive_words = config_json.get(u'sensitive_words', []) SetOption(u'extension_map', func=lambda m: [(re.compile(r), e) for r, e in m]) SetOption(u'sensitive_res') sensitive_string_file = config_json.get(u'sensitive_string_file') if sensitive_string_file: sensitive_string_json = config_utils.ReadConfigFile( sensitive_string_file) config_utils.CheckJsonKeys('sensitive string config', sensitive_string_json, [u'sensitive_words', u'sensitive_res']) config.sensitive_words.extend( sensitive_string_json.get(u'sensitive_words', [])) config.sensitive_res.extend( sensitive_string_json.get(u'sensitive_res', [])) whitelist_entries = [] for entry in config_json.get(u'whitelist', []): config_utils.CheckJsonKeys('whitelist entry', entry, [u'filter', u'trigger', u'filename']) whitelist_entries.append( (entry.get(u'filter', ''), entry.get(u'trigger', ''), entry.get(u'filename', ''))) config.whitelist = whitelist.Whitelist(whitelist_entries) SetOption(u'scrub_sensitive_comments') SetOption(u'rearranging_config') SetOption(u'string_replacements') SetOption(u'regex_replacements') SetOption(u'scrub_non_documentation_comments') SetOption(u'scrub_all_comments') # User options. # TODO(dborowitz): Make the scrubbers pass unicode to the UsernameFilter. # TODO(dborowitz): Make these names consistent so we can use SetOption. strs = lambda us: [str(u) for u in us] if u'usernames_to_publish' in config_json: config.publishable_usernames = strs( config_json[u'usernames_to_publish']) if u'usernames_to_scrub' in config_json: config.scrubbable_usernames = strs(config_json[u'usernames_to_scrub']) SetOption(u'usernames_file') SetOption(u'scrub_unknown_users') SetOption(u'scrub_authors') SetOption(u'scrub_proto_comments') # C/C++-specific options. SetOption(u'c_includes_config_file') # Java-specific options. action_map = { 'IGNORE': base.ACTION_IGNORE, 'DELETE': base.ACTION_DELETE, 'ERROR': base.ACTION_ERROR, } SetOption(u'empty_java_file_action', func=lambda a: action_map[a]) SetOption(u'maximum_blank_lines') SetOption(u'scrub_java_testsize_annotations') config.java_renames = [] for rename in config_json.get(u'java_renames', []): config_utils.CheckJsonKeys('java rename', rename, [u'internal_package', u'public_package']) config.java_renames.append( java_scrubber.JavaRenameScrubber(rename[u'internal_package'], rename[u'public_package'])) # Javascript-specific options. # TODO(user): Remove js_directory_rename after all config files have been # migrated to use js_directory_renames. js_directory_rename = config_json.get(u'js_directory_rename') if js_directory_rename is not None: config_utils.CheckJsonKeys( 'JS directory rename', js_directory_rename, [u'internal_directory', u'public_directory']) config.js_directory_renames.append( line_scrubber.JsDirectoryRename( js_directory_rename[u'internal_directory'], js_directory_rename[u'public_directory'])) js_directory_renames = config_json.get(u'js_directory_renames', []) for js_directory_rename in js_directory_renames: config_utils.CheckJsonKeys( 'JS directory rename', js_directory_rename, [u'internal_directory', u'public_directory']) config.js_directory_renames.append( line_scrubber.JsDirectoryRename( js_directory_rename[u'internal_directory'], js_directory_rename[u'public_directory'])) # Python-specific options. config.python_module_renames = [] for rename in config_json.get(u'python_module_renames', []): config_utils.CheckJsonKeys( 'python module rename', rename, [u'internal_module', u'public_module', u'as_name']) config.python_module_renames.append( python_scrubber.PythonModuleRename(rename[u'internal_module'], rename[u'public_module'], as_name=rename.get(u'as_name'))) # TODO(dborowitz): Find out why these are singleton protobufs; possibly # flatten them. config.python_module_removes = [] for remove in config_json.get(u'python_module_removes', []): config_utils.CheckJsonKeys('python module removal', remove, [u'import_module']) config.python_module_removes.append( python_scrubber.PythonModuleRemove(remove[u'import_module'])) python_shebang_replace = config_json.get(u'python_shebang_replace') if python_shebang_replace is not None: config_utils.CheckJsonKeys('python shebang replacement', python_shebang_replace, [u'shebang_line']) config.python_shebang_replace = python_scrubber.PythonShebangReplace( python_shebang_replace[u'shebang_line']) # GWT-specific options. SetOption(u'scrub_gwt_inherits') config.ResetScrubbers(extension_to_scrubber_map, default_scrubbers) return config
def __init__(self, codebase, input_files, extension_to_scrubber_map, default_scrubbers, modify, output_tar, temp_dir): # Other object state. self.codebase = os.path.abspath(codebase) self.input_files = input_files self.modify = modify self.output_tar = output_tar self.temp_dir = temp_dir self._comment_scrubbers = None self._sensitive_string_scrubbers = None # General options. #If no ignore_files_re given, then we want to ignore no files, # which means matching no strings. Simiarly for # do_not_scrub_files. '$a' is a regex that matches no strings. self.ignore_files_re = re.compile('$a') self.do_not_scrub_files_re = re.compile('$a') self.extension_map = [] self.sensitive_words = [] self.sensitive_res = [] self.whitelist = whitelist.Whitelist([]) self.scrub_sensitive_comments = True self.rearranging_config = {} self.string_replacements = [] self.regex_replacements = [] # Username options. self.scrubbable_usernames = None self.publishable_usernames = None self.usernames_file = None self.scrub_unknown_users = False self.scrub_authors = True self.scrub_proto_comments = False self.scrub_non_documentation_comments = False self.scrub_all_comments = False # C/C++-specific options. self.c_includes_config_file = None # Java-specific options. self.scrub_java_testsize_annotations = False self.maximum_blank_lines = 0 self.empty_java_file_action = base.ACTION_IGNORE self.java_renames = [] # Javascript-specific options. self.js_directory_renames = [] # Python-specific options. self.python_module_renames = [] self.python_module_removes = [] self.python_shebang_replace = None # GWT-specific options. self.scrub_gwt_inherits = [] # TODO(dborowitz): Make this a config option. self.known_filenames = set([ '.gitignore', 'AUTHORS', 'CONTRIBUTORS', 'COPYING', 'LICENSE', 'Makefile', 'README' ]) self.ResetScrubbers(extension_to_scrubber_map, default_scrubbers)
def testReWhitelist(self): whitelist_entry = ('SENSITIVE_RE', 'supersecret', 'foo') scrubber = self.SensitiveWordCommentScrubber( whitelist.Whitelist([whitelist_entry])) self.assertPublish(scrubber, '// xxxsupersecretxxx')
def testCodeRes(self): white_list = whitelist.Whitelist([]) scrubber = self.SensitiveReCommentScrubber(white_list) self.assertRevision(scrubber, '', '// xxxsupersecretxxx') self.assertPublish(scrubber, '// fine')
def testWhitelistStar(self): whitelist_entry = ('SENSITIVE_WORD', 'testy', '*') scrubber = self.SensitiveWordCommentScrubber( whitelist.Whitelist([whitelist_entry])) self.assertPublish(scrubber, '// testy')
def testCodeWords(self): white_list = whitelist.Whitelist([]) scrubber = self.SensitiveWordCommentScrubber(white_list) self.assertRevision(scrubber, '', '// testy') self.assertPublish(scrubber, '// fine')