Ejemplo n.º 1
0
def provider_libc_rip(hex_encoded_id, hash_type):
    # Deferred import because it's slow
    import requests

    # Build the request for the hash type
    # https://github.com/niklasb/libc-database/blob/master/searchengine/api.yml
    if hash_type == 'build_id':
        hash_type = 'buildid'
    url    = "https://libc.rip/api/find"
    params = {hash_type: hex_encoded_id}

    data = b""
    try:
        result = requests.post(url, json=params, timeout=20)
        if result.status_code != 200 or len(result.json()) == 0:
            log.warn_once("Could not find libc for %s %s on libc.rip", hash_type, hex_encoded_id)
            log.debug("Error: %s", result.text)
            return None

        libc_match = result.json()
        assert len(libc_match) == 1, 'Invalid libc.rip response.'

        url = libc_match[0]['download_url']
        log.debug("Downloading data from libc.rip: %s", url)
        data = wget(url, timeout=20)

        if not data:
            log.warn_once("Could not fetch libc for %s %s from libc.rip", hash_type, hex_encoded_id)
            return None
    except requests.RequestException as e:
        log.warn_once("Failed to fetch libc for %s %s from libc.rip: %s", hash_type, hex_encoded_id, e)
    return data
Ejemplo n.º 2
0
def provider_libcdb(hex_encoded_id, hash_type):
    # Deferred import because it's slow
    import requests
    from six.moves import urllib

    # Build the URL using the requested hash type
    url_base = "https://gitlab.com/libcdb/libcdb/raw/master/hashes/%s/" % hash_type
    url      = urllib.parse.urljoin(url_base, hex_encoded_id)

    data     = b""
    log.debug("Downloading data from LibcDB: %s", url)
    try:
        while not data.startswith(b'\x7fELF'):
            data = wget(url, timeout=20)

            if not data:
                log.warn_once("Could not fetch libc for %s %s from libcdb", hash_type, hex_encoded_id)
                break
            
            # GitLab serves up symlinks with
            if data.startswith(b'..'):
                url = os.path.dirname(url) + '/'
                url = urllib.parse.urljoin(url.encode('utf-8'), data)
    except requests.RequestException as e:
        log.warn_once("Failed to fetch libc for %s %s from libcdb: %s", hash_type, hex_encoded_id, e)
    return data
Ejemplo n.º 3
0
def _search_debuginfo_by_hash(base_url, hex_encoded_id):
    # Deferred import because it's slow
    import requests
    from six.moves import urllib

    # Check if we tried this buildid before.
    cache, cache_valid = _check_elf_cache('libcdb_dbg', hex_encoded_id, 'build_id')
    if cache_valid:
        return cache

    # Try to find separate debuginfo.
    url  = '/buildid/{}/debuginfo'.format(hex_encoded_id)
    url  = urllib.parse.urljoin(base_url, url)
    data = b""
    log.debug("Downloading data from debuginfod: %s", url)
    try:
        data = wget(url, timeout=20)
    except requests.RequestException as e:
        log.warn_once("Failed to fetch libc debuginfo for build_id %s from %s: %s", hex_encoded_id, base_url, e)
    
    # Save whatever we got to the cache
    write(cache, data or b'')

    # Return ``None`` if we did not get a valid ELF file
    if not data or not data.startswith(b'\x7FELF'):
        log.warn_once("Could not fetch libc debuginfo for build_id %s from %s", hex_encoded_id, base_url)
        return None

    return cache
Ejemplo n.º 4
0
def search_by_hash(hex_encoded_id, hash_type='build_id'):
    assert hash_type in HASHES, hash_type

    # Ensure that the libcdb cache directory exists
    cache_dir = os.path.join(context.cache_dir, 'libcdb', hash_type)

    if not os.path.isdir(cache_dir):
        os.makedirs(cache_dir)

    # If we already downloaded the file, and it looks even passingly like
    # a valid ELF file, return it.
    cache = os.path.join(cache_dir, hex_encoded_id)

    if os.path.exists(cache):
        log.debug("Found existing cached libc at %r", cache)

        data = read(cache)
        if data.startswith(b'\x7FELF'):
            log.info_once("Using cached data from %r", cache)
            return cache
        else:
            log.info_once("Skipping unavialable libc %s", hex_encoded_id)
            return None

    # Build the URL using the requested hash type
    url_base = "https://gitlab.com/libcdb/libcdb/raw/master/hashes/%s/" % hash_type
    url = urllib.parse.urljoin(url_base, hex_encoded_id)

    data = b""
    while not data.startswith(b'\x7fELF'):
        log.debug("Downloading data from LibcDB: %s", url)
        data = wget(url)

        if not data:
            log.warn_once("Could not fetch libc for build_id %s",
                          hex_encoded_id)
            break

        # GitLab serves up symlinks with
        if data.startswith(b'..'):
            url = os.path.dirname(url) + '/'
            url = urllib.parse.urljoin(url.encode('utf-8'), data)

    # Save whatever we got to the cache
    write(cache, data or b'')

    # Return ``None`` if we did not get a valid ELF file
    if not data or not data.startswith(b'\x7FELF'):
        return None

    return cache
Ejemplo n.º 5
0
def search_by_build_id(hex_encoded_id):
    """
    Given a hex-encoded Build ID, return the path to an ELF with that Build ID
    only the local system.

    If it can't be found, return None.

    Arguments:
        hex_encoded_id(str):
            Hex-encoded Build ID (e.g. 'ABCDEF...') of the library

    Returns:
        Path to the downloaded library on disk, or :const:`None`.
    """
    cache = cache_dir + '-libc.so.' + hex_encoded_id

    if os.path.exists(cache) and read(cache).startswith('\x7FELF'):
        log.info_once("Using cached data from %r" % cache)
        return cache

    log.info("Downloading data from GitHub")

    url_base = "https://gitlab.com/libcdb/libcdb/raw/master/hashes/build_id/"
    url = urlparse.urljoin(url_base, hex_encoded_id)

    data = ""
    while not data.startswith('\x7fELF'):
        data = wget(url)

        if not data:
            return None

        if data.startswith('..'):
            url = os.path.dirname(url) + '/'
            url = urlparse.urljoin(url, data)

    write(cache, data)
    return cache