Example #1
0
def store_missing_symbol_task(*args, **kwargs):
    """The store_missing_symbol() function is preferred to use directly
    because it can return a "hash". The hash is useful for doing
    Microsoft downloads where it wants to associate the missing download
    with the record of trying to find it on Microsoft's symbol server.

    Use this task when you are OK with doing a fire-and-forget of
    logging that the symbol is indeed missing.
    """
    store_missing_symbol(*args, **kwargs)
Example #2
0
def log_symbol_get_404(symbol, debugid, filename, code_file="", code_id=""):
    """Store the fact that a symbol could not be found.

    The purpose of this is be able to answer "What symbol fetches have
    recently been attempted and failed?" With that knowledge, we can
    deduce which symbols are commonly needed in symbolication but failed
    to be available. Then you can try to go and get hold of them and
    thus have less symbol 404s in the future.

    Because this is expected to be called A LOT (in particular from
    Socorro's Processor) we have to do this rapidly in a database
    that is suitable for many fast writes.
    See https://bugzilla.mozilla.org/show_bug.cgi?id=1361854#c5
    for the backstory about expected traffic.

    The URL used when requesting the file will only ever be
    'symbol', 'debugid' and 'filename', but some services, like Socorro's
    stackwalker is actually aware of other parameters that are
    relevant only to this URL. Hence 'code_file' and 'code_id' which
    are both optional.
    """
    if settings.ENABLE_STORE_MISSING_SYMBOLS:
        try:
            return store_missing_symbol(symbol,
                                        debugid,
                                        filename,
                                        code_file=code_file,
                                        code_id=code_id)
        except OperationalError as exception:
            # Note that this doesn't return. The reason is because it's
            # a background job. We can only fire-and-forget sending it.
            # That's why we only do this in the unusual case of an
            # operational error.
            # By sending it to a background task, it gets to try storing
            # it again. The reasons that's more like to work is because...
            #
            #   A) There's a natural delay until it tries the DB
            #      write. Perhaps that little delay is all we need to try
            #      again and be lucky.
            #   B) The celery tasks have built-in support for retrying.
            #      So if it fails the first time (which is already unlikely)
            #      you get a second chance after some deliberate sleep.
            #
            # The return value is only rarely useful. It's used to indicate
            # that it *just* now got written down. And that's useful to know
            # when we attempt to automatically download it from Microsoft.
            store_missing_symbol_task.delay(symbol,
                                            debugid,
                                            filename,
                                            code_file=code_file,
                                            code_id=code_id)
Example #3
0
def download_microsoft_symbol(symbol,
                              debugid,
                              code_file=None,
                              code_id=None,
                              missing_symbol_hash=None):
    MS_URL = "https://msdl.microsoft.com/download/symbols/"
    MS_USER_AGENT = "Microsoft-Symbol-Server/6.3.0.0"
    url = MS_URL + "/".join([symbol, debugid, symbol[:-1] + "_"])
    session = requests_retry_session()
    response = session.get(url, headers={"User-Agent": MS_USER_AGENT})
    if response.status_code != 200:
        logger.info(
            f"Symbol {symbol}/{debugid} does not exist on msdl.microsoft.com")
        return

    # The fact that the file does exist on Microsoft's server means
    # we're going to download it and at least look at it.
    if not missing_symbol_hash:
        missing_symbol_hash = store_missing_symbol(
            symbol,
            debugid,
            os.path.splitext(symbol)[0] + ".sym",
            code_file=code_file,
            code_id=code_id,
        )
    else:
        assert isinstance(missing_symbol_hash, str), missing_symbol_hash
    missing_symbol = MissingSymbol.objects.get(hash=missing_symbol_hash)
    download_obj = MicrosoftDownload.objects.create(
        missing_symbol=missing_symbol, url=url)

    with tempfile.TemporaryDirectory() as tmpdirname:
        filepath = os.path.join(tmpdirname, os.path.basename(url))
        with open(filepath, "wb") as f:
            content = response.content
            if not content.startswith(b"MSCF"):
                error_msg = f"Beginning of content in {url} did not start with 'MSCF'"
                logger.info(error_msg)
                download_obj.error = error_msg
                download_obj.save()
                return
            f.write(content)

        cmd = [
            settings.CABEXTRACT_PATH,
            "--quiet",
            # Important so that the extract .pdb filename is predictable.
            "--lowercase",
            "--directory",
            tmpdirname,
            filepath,
        ]
        logger.debug(" ".join(cmd))
        pipe = subprocess.Popen(cmd,
                                stdout=subprocess.PIPE,
                                stderr=subprocess.PIPE)
        with metrics.timer("download_cabextract"):
            std_out, std_err = pipe.communicate()
            if std_err:
                error_msg = f"cabextract failed for {url}. Error: {std_err!r}"
                logger.warning(error_msg)
                download_obj.error = error_msg
                download_obj.save()
                return

        # Running cabextract creates a file 'foo.pdb' from 'foo.pd_'
        pdb_filepath = filepath.lower().replace(".pd_", ".pdb")
        assert pdb_filepath != filepath
        assert os.path.isfile(pdb_filepath), pdb_filepath
        cmd = [settings.DUMP_SYMS_PATH, pdb_filepath]
        logger.debug(" ".join(cmd))
        pipe = subprocess.Popen(cmd,
                                stdout=subprocess.PIPE,
                                stderr=subprocess.PIPE)
        with metrics.timer("download_dump_syms"):
            std_out, std_err = pipe.communicate()
            # Note! It's expected, even if the dump_syms call works,
            # that the stderr contains something like:
            # b'Failed to find paired exe/dll file\n'
            # which is fine and can be ignored.
            if std_err and not std_out:
                error_msg = (f"dump_syms extraction failed for {url}. "
                             f"Error: {std_err!r}")
                download_obj.error = error_msg
                download_obj.save()
                raise DumpSymsError(error_msg)

        # Let's go ahead and upload it now, if it hasn't been uploaded
        # before.
        file_path = os.path.join(
            tmpdirname,
            os.path.splitext(os.path.basename(filepath))[0] + ".sym")
        with open(file_path, "wb") as f:
            f.write(std_out)
        upload_microsoft_symbol(symbol, debugid, file_path, download_obj)
Example #4
0
 def fake_task(*args, **kwargs):
     store_missing_symbol(*args, **kwargs)
     task_arguments.append(args)