def get_target_paths(self): """ Within a network_location, we match URLs with this list of regular expressions, which tell us to map from a source URL to a target URL. If there are multiple regular expressions which match a source URL, the order of appearance will be used to resolve ambiguity. """ INVALID_TARGET_PATH = "Invalid target path in {network_location}!" # An "identity" capture from source URL to target URL. WILD_TARGET_PATH = { "(.*)": "{0}" } target_paths = self.configuration.get("target_paths", [WILD_TARGET_PATH]) # target_paths: [ target_path, ... ] assert isinstance(target_paths, types.ListType) for target_path in target_paths: try: # target_path: { "regex_with_groups", "target_with_group_captures" } # e.g. { ".*(/some/directory)/$", "{0}/index.html" } assert isinstance(target_path, types.DictType) assert len(target_path) == 1 except: error_message = \ INVALID_TARGET_PATH.format(network_location=self.network_location) Logger.exception(error_message) raise InvalidConfiguration(error_message) return target_paths
def get(self, url): """Get an Updater, if any, for this URL. Assumptions: - @url is a string.""" GENERIC_WARNING_MESSAGE = "No updater or interposition for url={url}" DIFFERENT_NETLOC_MESSAGE = "We have an updater for netloc={netloc1} but not for netlocs={netloc2}" HOSTNAME_FOUND_MESSAGE = "Found updater for hostname={hostname}" HOSTNAME_NOT_FOUND_MESSAGE = "No updater for hostname={hostname}" updater = None try: parsed_url = urlparse.urlparse(url) hostname = parsed_url.hostname port = parsed_url.port or 80 netloc = parsed_url.netloc network_location = "{hostname}:{port}".format(hostname=hostname, port=port) # Sometimes parsed_url.netloc does not have a port (e.g. 80), # so we do a double check. network_locations = set((netloc, network_location)) updater = self.__updaters.get(hostname) if updater is None: Logger.warn( HOSTNAME_NOT_FOUND_MESSAGE.format(hostname=hostname)) else: # Ensure that the updater is meant for this (hostname, port). if updater.configuration.network_location in network_locations: Logger.info( HOSTNAME_FOUND_MESSAGE.format(hostname=hostname)) # Raises an exception in case we do not recognize how to # transform this URL for TUF. In that case, there will be no # updater for this URL. target_filepath = updater.get_target_filepath(url) else: # Same hostname, but different (not user-specified) port. Logger.warn( DIFFERENT_NETLOC_MESSAGE.format( netloc1=updater.configuration.network_location, netloc2=network_locations)) updater = None except: Logger.exception(GENERIC_WARNING_MESSAGE.format(url=url)) updater = None finally: if updater is None: Logger.warn(GENERIC_WARNING_MESSAGE.format(url=url)) return updater
def __read_configuration(configuration_handler, filename="tuf.interposition.json", parent_repository_directory=None, parent_ssl_certificates_directory=None): """ A generic function to read TUF interposition configurations off a file, and then handle those configurations with a given function. configuration_handler must be a function which accepts a tuf.interposition.Configuration instance. Returns the parsed configurations as a dictionary of configurations indexed by hostnames.""" INVALID_TUF_CONFIGURATION = "Invalid configuration for {network_location}!" INVALID_TUF_INTERPOSITION_JSON = "Invalid configuration in {filename}!" NO_CONFIGURATIONS = "No configurations found in configuration in {filename}!" # Configurations indexed by hostnames. parsed_configurations = {} try: with open(filename) as tuf_interposition_json: tuf_interpositions = json.load(tuf_interposition_json) configurations = tuf_interpositions.get("configurations", {}) if len(configurations) == 0: raise InvalidConfiguration( NO_CONFIGURATIONS.format(filename=filename)) else: for network_location, configuration in configurations.iteritems( ): try: configuration_parser = ConfigurationParser( network_location, configuration, parent_repository_directory= parent_repository_directory, parent_ssl_certificates_directory= parent_ssl_certificates_directory) configuration = configuration_parser.parse() configuration_handler(configuration) parsed_configurations[ configuration.hostname] = configuration except: Logger.exception( INVALID_TUF_CONFIGURATION.format( network_location=network_location)) raise except: Logger.exception( INVALID_TUF_INTERPOSITION_JSON.format(filename=filename)) raise else: return parsed_configurations
def get(self, url): """Get an Updater, if any, for this URL. Assumptions: - @url is a string.""" GENERIC_WARNING_MESSAGE = "No updater or interposition for url={url}" DIFFERENT_NETLOC_MESSAGE = "We have an updater for netloc={netloc1} but not for netlocs={netloc2}" HOSTNAME_FOUND_MESSAGE = "Found updater for hostname={hostname}" HOSTNAME_NOT_FOUND_MESSAGE = "No updater for hostname={hostname}" updater = None try: parsed_url = urlparse.urlparse(url) hostname = parsed_url.hostname port = parsed_url.port or 80 netloc = parsed_url.netloc network_location = "{hostname}:{port}".format(hostname=hostname, port=port) # Sometimes parsed_url.netloc does not have a port (e.g. 80), # so we do a double check. network_locations = set((netloc, network_location)) updater = self.__updaters.get(hostname) if updater is None: Logger.warn(HOSTNAME_NOT_FOUND_MESSAGE.format(hostname=hostname)) else: # Ensure that the updater is meant for this (hostname, port). if updater.configuration.network_location in network_locations: Logger.info(HOSTNAME_FOUND_MESSAGE.format(hostname=hostname)) # Raises an exception in case we do not recognize how to # transform this URL for TUF. In that case, there will be no # updater for this URL. target_filepath = updater.get_target_filepath(url) else: # Same hostname, but different (not user-specified) port. Logger.warn(DIFFERENT_NETLOC_MESSAGE.format( netloc1=updater.configuration.network_location, netloc2=network_locations)) updater = None except: Logger.exception(GENERIC_WARNING_MESSAGE.format(url=url)) updater = None finally: if updater is None: Logger.warn(GENERIC_WARNING_MESSAGE.format(url=url)) return updater
def __check_configuration(self, configuration): """ If the given Configuration is invalid, I raise an exception. Otherwise, I return some information about the Configuration, such as repository mirror hostnames. """ INVALID_REPOSITORY_MIRROR = "Invalid repository mirror {repository_mirror}!" # Updater has a "global" view of configurations, so it performs # additional checks after Configuration's own local checks. assert isinstance(configuration, Configuration) # Restrict each (incoming, outgoing) hostname pair to be unique across # configurations; this prevents interposition cycles, amongst other # things. # GOOD: A -> { A:X, A:Y, B, ... }, C -> { D }, ... # BAD: A -> { B }, B -> { C }, C -> { A }, ... assert configuration.hostname not in self.__updaters assert configuration.hostname not in self.__repository_mirror_hostnames # Parse TUF server repository mirrors. repository_mirrors = configuration.repository_mirrors repository_mirror_hostnames = set() for repository_mirror in repository_mirrors: mirror_configuration = repository_mirrors[repository_mirror] try: url_prefix = mirror_configuration["url_prefix"] parsed_url = urlparse.urlparse(url_prefix) mirror_hostname = parsed_url.hostname # Restrict each (incoming, outgoing) hostname pair to be unique # across configurations; this prevents interposition cycles, # amongst other things. assert mirror_hostname not in self.__updaters assert mirror_hostname not in self.__repository_mirror_hostnames # Remember this mirror's hostname for the next network_location. repository_mirror_hostnames.add(mirror_hostname) except: error_message = \ INVALID_REPOSITORY_MIRROR.format(repository_mirror=repository_mirror) Logger.exception(error_message) raise InvalidConfiguration(error_message) return repository_mirror_hostnames
def __read_configuration(configuration_handler, filename="tuf.interposition.json", parent_repository_directory=None, parent_ssl_certificates_directory=None): """ A generic function to read TUF interposition configurations off a file, and then handle those configurations with a given function. configuration_handler must be a function which accepts a tuf.interposition.Configuration instance. Returns the parsed configurations as a dictionary of configurations indexed by hostnames.""" INVALID_TUF_CONFIGURATION = "Invalid configuration for {network_location}!" INVALID_TUF_INTERPOSITION_JSON = "Invalid configuration in {filename}!" NO_CONFIGURATIONS = "No configurations found in configuration in {filename}!" # Configurations indexed by hostnames. parsed_configurations = {} try: with open(filename) as tuf_interposition_json: tuf_interpositions = json.load(tuf_interposition_json) configurations = tuf_interpositions.get("configurations", {}) if len(configurations) == 0: raise InvalidConfiguration(NO_CONFIGURATIONS.format(filename=filename)) else: for network_location, configuration in configurations.iteritems(): try: configuration_parser = ConfigurationParser(network_location, configuration, parent_repository_directory=parent_repository_directory, parent_ssl_certificates_directory=parent_ssl_certificates_directory) configuration = configuration_parser.parse() configuration_handler(configuration) parsed_configurations[configuration.hostname] = configuration except: Logger.exception(INVALID_TUF_CONFIGURATION.format(network_location=network_location)) raise except: Logger.exception(INVALID_TUF_INTERPOSITION_JSON.format(filename=filename)) raise else: return parsed_configurations
def get_target_filepath(self, source_url): """Given source->target map, figure out what TUF *should* download given a URL.""" WARNING_MESSAGE = "Possibly invalid target_paths for " + \ "{network_location}! No TUF interposition for {url}" parsed_source_url = urlparse.urlparse(source_url) target_filepath = None try: # Does this source URL match any regular expression which tells us # how to map the source URL to a target URL understood by TUF? for target_path in self.configuration.target_paths: # target_path: { "regex_with_groups", "target_with_group_captures" } # e.g. { ".*(/some/directory)/$", "{0}/index.html" } source_path_pattern, target_path_pattern = target_path.items( )[0] source_path_match = re.match(source_path_pattern, parsed_source_url.path) # TODO: A failure in string formatting is *critical*. if source_path_match is not None: target_filepath = target_path_pattern.format( *source_path_match.groups()) # If there is more than one regular expression which # matches source_url, we resolve ambiguity by order of # appearance. break # If source_url does not match any regular expression... if target_filepath is None: # ...then we raise a predictable exception. raise URLMatchesNoPattern(source_url) except: Logger.exception( WARNING_MESSAGE.format( network_location=self.configuration.network_location, url=source_url)) raise else: # TUF assumes that target_filepath does not begin with a '/'. target_filepath = target_filepath.lstrip('/') return target_filepath
def get_repository_mirrors(self, hostname, port, ssl_certificates): """Parse TUF server repository mirrors.""" INVALID_REPOSITORY_MIRROR = "Invalid repository mirror {repository_mirror}!" repository_mirrors = self.configuration["repository_mirrors"] repository_mirror_network_locations = set() for repository_mirror in repository_mirrors: mirror_configuration = repository_mirrors[repository_mirror] try: url_prefix = mirror_configuration["url_prefix"] parsed_url = urlparse.urlparse(url_prefix) mirror_hostname = parsed_url.hostname mirror_port = parsed_url.port or 80 mirror_scheme = parsed_url.scheme mirror_netloc = "{hostname}:{port}".format( hostname=mirror_hostname, port=mirror_port) # TODO: warn is ssl_certificates is specified, # but there is no mirror_scheme == "https" if mirror_scheme == "https": assert os.path.isfile(ssl_certificates) # No single-edge cycle in interposition. # GOOD: A -> { A:XYZ, ... } # BAD: A -> { A, ... } assert not (mirror_hostname == hostname and mirror_port == port) # Unique network location over repository mirrors. # GOOD: A -> { A:X, A:Y, ... } # BAD: A -> { A:X, A:X, ... } assert mirror_netloc not in repository_mirror_network_locations # Remember this mirror's network location to check the rest of the mirrors. repository_mirror_network_locations.add(mirror_netloc) except: error_message = \ INVALID_REPOSITORY_MIRROR.format(repository_mirror=repository_mirror) Logger.exception(error_message) raise InvalidConfiguration(error_message) return repository_mirrors
def get_repository_mirrors(self, hostname, port, ssl_certificates): """Parse TUF server repository mirrors.""" INVALID_REPOSITORY_MIRROR = "Invalid repository mirror {repository_mirror}!" repository_mirrors = self.configuration["repository_mirrors"] repository_mirror_network_locations = set() for repository_mirror in repository_mirrors: mirror_configuration = repository_mirrors[repository_mirror] try: url_prefix = mirror_configuration["url_prefix"] parsed_url = urlparse.urlparse(url_prefix) mirror_hostname = parsed_url.hostname mirror_port = parsed_url.port or 80 mirror_scheme = parsed_url.scheme mirror_netloc = "{hostname}:{port}".format(hostname = mirror_hostname, port = mirror_port) # TODO: warn is ssl_certificates is specified, # but there is no mirror_scheme == "https" if mirror_scheme == "https": assert os.path.isfile(ssl_certificates) # No single-edge cycle in interposition. # GOOD: A -> { A:XYZ, ... } # BAD: A -> { A, ... } assert not (mirror_hostname == hostname and mirror_port == port) # Unique network location over repository mirrors. # GOOD: A -> { A:X, A:Y, ... } # BAD: A -> { A:X, A:X, ... } assert mirror_netloc not in repository_mirror_network_locations # Remember this mirror's network location to check the rest of the mirrors. repository_mirror_network_locations.add(mirror_netloc) except: error_message = \ INVALID_REPOSITORY_MIRROR.format(repository_mirror=repository_mirror) Logger.exception(error_message) raise InvalidConfiguration(error_message) return repository_mirrors
def get_target_filepath(self, source_url): """Given source->target map, figure out what TUF *should* download given a URL.""" WARNING_MESSAGE = "Possibly invalid target_paths for " + \ "{network_location}! No TUF interposition for {url}" parsed_source_url = urlparse.urlparse(source_url) target_filepath = None try: # Does this source URL match any regular expression which tells us # how to map the source URL to a target URL understood by TUF? for target_path in self.configuration.target_paths: # target_path: { "regex_with_groups", "target_with_group_captures" } # e.g. { ".*(/some/directory)/$", "{0}/index.html" } source_path_pattern, target_path_pattern = target_path.items()[0] source_path_match = re.match(source_path_pattern, parsed_source_url.path) # TODO: A failure in string formatting is *critical*. if source_path_match is not None: target_filepath = target_path_pattern.format(*source_path_match.groups()) # If there is more than one regular expression which # matches source_url, we resolve ambiguity by order of # appearance. break # If source_url does not match any regular expression... if target_filepath is None: # ...then we raise a predictable exception. raise URLMatchesNoPattern(source_url) except: Logger.exception(WARNING_MESSAGE.format( network_location=self.configuration.network_location, url=source_url)) raise else: # TUF assumes that target_filepath does not begin with a '/'. target_filepath = target_filepath.lstrip('/') return target_filepath
def __check_configuration_on_add(self, configuration): """ If the given Configuration is invalid, I raise an exception. Otherwise, I return some information about the Configuration, such as repository mirror hostnames. """ INVALID_REPOSITORY_MIRROR = "Invalid repository mirror {repository_mirror}!" # Updater has a "global" view of configurations, so it performs # additional checks after Configuration's own local checks. assert isinstance(configuration, Configuration) # Restrict each (incoming, outgoing) hostname pair to be unique across # configurations; this prevents interposition cycles, amongst other # things. # GOOD: A -> { A:X, A:Y, B, ... }, C -> { D }, ... # BAD: A -> { B }, B -> { C }, C -> { A }, ... assert configuration.hostname not in self.__updaters assert configuration.hostname not in self.__repository_mirror_hostnames # Check for redundancy in server repository mirrors. repository_mirror_hostnames = configuration.get_repository_mirror_hostnames( ) for mirror_hostname in repository_mirror_hostnames: try: # Restrict each hostname in every (incoming, outgoing) pair to be # unique across configurations; this prevents interposition cycles, # amongst other things. assert mirror_hostname not in self.__updaters assert mirror_hostname not in self.__repository_mirror_hostnames except: error_message = \ INVALID_REPOSITORY_MIRROR.format(repository_mirror=mirror_hostname) Logger.exception(error_message) raise InvalidConfiguration(error_message) return repository_mirror_hostnames
def __check_configuration_on_add(self, configuration): """ If the given Configuration is invalid, I raise an exception. Otherwise, I return some information about the Configuration, such as repository mirror hostnames. """ INVALID_REPOSITORY_MIRROR = "Invalid repository mirror {repository_mirror}!" # Updater has a "global" view of configurations, so it performs # additional checks after Configuration's own local checks. assert isinstance(configuration, Configuration) # Restrict each (incoming, outgoing) hostname pair to be unique across # configurations; this prevents interposition cycles, amongst other # things. # GOOD: A -> { A:X, A:Y, B, ... }, C -> { D }, ... # BAD: A -> { B }, B -> { C }, C -> { A }, ... assert configuration.hostname not in self.__updaters assert configuration.hostname not in self.__repository_mirror_hostnames # Check for redundancy in server repository mirrors. repository_mirror_hostnames = configuration.get_repository_mirror_hostnames() for mirror_hostname in repository_mirror_hostnames: try: # Restrict each hostname in every (incoming, outgoing) pair to be # unique across configurations; this prevents interposition cycles, # amongst other things. assert mirror_hostname not in self.__updaters assert mirror_hostname not in self.__repository_mirror_hostnames except: error_message = \ INVALID_REPOSITORY_MIRROR.format(repository_mirror=mirror_hostname) Logger.exception(error_message) raise InvalidConfiguration(error_message) return repository_mirror_hostnames
def __read_configuration(configuration_handler, filename="tuf.interposition.json", parent_repository_directory=None, parent_ssl_certificates_directory=None): """ A generic function to read a TUF interposition configuration off the disk, and handle it. configuration_handler must be a function which accepts a tuf.interposition.Configuration instance.""" INVALID_TUF_CONFIGURATION = "Invalid configuration for {network_location}!" INVALID_TUF_INTERPOSITION_JSON = "Invalid configuration in {filename}!" NO_CONFIGURATIONS = "No configurations found in configuration in {filename}!" try: with open(filename) as tuf_interposition_json: tuf_interpositions = json.load(tuf_interposition_json) configurations = tuf_interpositions.get("configurations", {}) if len(configurations) == 0: raise InvalidConfiguration(NO_CONFIGURATIONS.format(filename=filename)) else: for network_location, configuration in configurations.iteritems(): try: configuration_parser = ConfigurationParser(network_location, configuration, parent_repository_directory=parent_repository_directory, parent_ssl_certificates_directory=parent_ssl_certificates_directory) configuration = configuration_parser.parse() configuration_handler(configuration) except: Logger.exception(INVALID_TUF_CONFIGURATION.format(network_location=network_location)) raise except: Logger.exception(INVALID_TUF_INTERPOSITION_JSON.format(filename=filename)) raise
def configure(filename="tuf.interposition.json", parent_repository_directory=None, parent_ssl_certificates_directory=None): """ The optional parent_repository_directory parameter is used to specify the containing parent directory of the "repository_directory" specified in a configuration for *all* network locations, because sometimes the absolute location of the "repository_directory" is only known at runtime. If you need to specify a different parent_repository_directory for other network locations, simply call this method again with different parameters. Ditto for the optional parent_ssl_certificates_directory parameter. Example of a TUF interposition configuration JSON object: { "configurations": { "seattle.cs.washington.edu": { "repository_directory": "client/", "repository_mirrors" : { "mirror1": { "url_prefix": "http://seattle-tuf.cs.washington.edu", "metadata_path": "metadata", "targets_path": "targets", "confined_target_dirs": [ "" ] } }, ("target_paths": [ { ".*/(simple/\\w+)/$": "{0}/index.html" }, { ".*/(packages/.+)$": "{0}" } ], "ssl_certificates": "cacert.pem") } } } "target_paths" is optional: If you do not tell TUF to selectively match paths with regular expressions, TUF will work over any path under the given network location. However, if you do specify it, you are then telling TUF how to transform a specified path into another one, and TUF will *not* recognize any unspecified path for the given network location. Unless any "url_prefix" begins with "https://", "ssl_certificates" is optional; it must specify certificates bundled as PEM (RFC 1422). """ INVALID_TUF_CONFIGURATION = "Invalid configuration for {network_location}!" INVALID_TUF_INTERPOSITION_JSON = "Invalid configuration in {filename}!" NO_CONFIGURATIONS = "No configurations found in configuration in {filename}!" try: with open(filename) as tuf_interposition_json: tuf_interpositions = json.load(tuf_interposition_json) configurations = tuf_interpositions.get("configurations", {}) if len(configurations) == 0: raise InvalidConfiguration(NO_CONFIGURATIONS.format(filename=filename)) else: for network_location, configuration in configurations.iteritems(): try: configuration_parser = ConfigurationParser(network_location, configuration, parent_repository_directory=parent_repository_directory, parent_ssl_certificates_directory=parent_ssl_certificates_directory) configuration = configuration_parser.parse() __updater_controller.add(configuration) except: Logger.exception(INVALID_TUF_CONFIGURATION.format(network_location=network_location)) raise except: Logger.exception(INVALID_TUF_INTERPOSITION_JSON.format(filename=filename)) raise