def provider_libc_rip(hex_encoded_id, hash_type): # Deferred import because it's slow import requests # Build the request for the hash type # https://github.com/niklasb/libc-database/blob/master/searchengine/api.yml if hash_type == 'build_id': hash_type = 'buildid' url = "https://libc.rip/api/find" params = {hash_type: hex_encoded_id} data = b"" try: result = requests.post(url, json=params, timeout=20) if result.status_code != 200 or len(result.json()) == 0: log.warn_once("Could not find libc for %s %s on libc.rip", hash_type, hex_encoded_id) log.debug("Error: %s", result.text) return None libc_match = result.json() assert len(libc_match) == 1, 'Invalid libc.rip response.' url = libc_match[0]['download_url'] log.debug("Downloading data from libc.rip: %s", url) data = wget(url, timeout=20) if not data: log.warn_once("Could not fetch libc for %s %s from libc.rip", hash_type, hex_encoded_id) return None except requests.RequestException as e: log.warn_once("Failed to fetch libc for %s %s from libc.rip: %s", hash_type, hex_encoded_id, e) return data
def provider_libcdb(hex_encoded_id, hash_type): # Deferred import because it's slow import requests from six.moves import urllib # Build the URL using the requested hash type url_base = "https://gitlab.com/libcdb/libcdb/raw/master/hashes/%s/" % hash_type url = urllib.parse.urljoin(url_base, hex_encoded_id) data = b"" log.debug("Downloading data from LibcDB: %s", url) try: while not data.startswith(b'\x7fELF'): data = wget(url, timeout=20) if not data: log.warn_once("Could not fetch libc for %s %s from libcdb", hash_type, hex_encoded_id) break # GitLab serves up symlinks with if data.startswith(b'..'): url = os.path.dirname(url) + '/' url = urllib.parse.urljoin(url.encode('utf-8'), data) except requests.RequestException as e: log.warn_once("Failed to fetch libc for %s %s from libcdb: %s", hash_type, hex_encoded_id, e) return data
def _search_debuginfo_by_hash(base_url, hex_encoded_id): # Deferred import because it's slow import requests from six.moves import urllib # Check if we tried this buildid before. cache, cache_valid = _check_elf_cache('libcdb_dbg', hex_encoded_id, 'build_id') if cache_valid: return cache # Try to find separate debuginfo. url = '/buildid/{}/debuginfo'.format(hex_encoded_id) url = urllib.parse.urljoin(base_url, url) data = b"" log.debug("Downloading data from debuginfod: %s", url) try: data = wget(url, timeout=20) except requests.RequestException as e: log.warn_once("Failed to fetch libc debuginfo for build_id %s from %s: %s", hex_encoded_id, base_url, e) # Save whatever we got to the cache write(cache, data or b'') # Return ``None`` if we did not get a valid ELF file if not data or not data.startswith(b'\x7FELF'): log.warn_once("Could not fetch libc debuginfo for build_id %s from %s", hex_encoded_id, base_url) return None return cache
def search_by_hash(hex_encoded_id, hash_type='build_id'): assert hash_type in HASHES, hash_type # Ensure that the libcdb cache directory exists cache_dir = os.path.join(context.cache_dir, 'libcdb', hash_type) if not os.path.isdir(cache_dir): os.makedirs(cache_dir) # If we already downloaded the file, and it looks even passingly like # a valid ELF file, return it. cache = os.path.join(cache_dir, hex_encoded_id) if os.path.exists(cache): log.debug("Found existing cached libc at %r", cache) data = read(cache) if data.startswith(b'\x7FELF'): log.info_once("Using cached data from %r", cache) return cache else: log.info_once("Skipping unavialable libc %s", hex_encoded_id) return None # Build the URL using the requested hash type url_base = "https://gitlab.com/libcdb/libcdb/raw/master/hashes/%s/" % hash_type url = urllib.parse.urljoin(url_base, hex_encoded_id) data = b"" while not data.startswith(b'\x7fELF'): log.debug("Downloading data from LibcDB: %s", url) data = wget(url) if not data: log.warn_once("Could not fetch libc for build_id %s", hex_encoded_id) break # GitLab serves up symlinks with if data.startswith(b'..'): url = os.path.dirname(url) + '/' url = urllib.parse.urljoin(url.encode('utf-8'), data) # Save whatever we got to the cache write(cache, data or b'') # Return ``None`` if we did not get a valid ELF file if not data or not data.startswith(b'\x7FELF'): return None return cache
def search_by_build_id(hex_encoded_id): """ Given a hex-encoded Build ID, return the path to an ELF with that Build ID only the local system. If it can't be found, return None. Arguments: hex_encoded_id(str): Hex-encoded Build ID (e.g. 'ABCDEF...') of the library Returns: Path to the downloaded library on disk, or :const:`None`. """ cache = cache_dir + '-libc.so.' + hex_encoded_id if os.path.exists(cache) and read(cache).startswith('\x7FELF'): log.info_once("Using cached data from %r" % cache) return cache log.info("Downloading data from GitHub") url_base = "https://gitlab.com/libcdb/libcdb/raw/master/hashes/build_id/" url = urlparse.urljoin(url_base, hex_encoded_id) data = "" while not data.startswith('\x7fELF'): data = wget(url) if not data: return None if data.startswith('..'): url = os.path.dirname(url) + '/' url = urlparse.urljoin(url, data) write(cache, data) return cache