Exemple #1
0
def ignore_changing_fields(clues):
    """Tries to detect and ignore MIME fields with ever changing content.

    Some servers might include fields varying with time, randomly, etc. Those
    fields are likely to alter the clue's digest and interfer with L{analyze},
    producing many false positives and making the scan useless. This function
    detects those fields and recalculates each clue's digest so they can be
    safely analyzed again.

    @param clues: Sequence of clues.
    @type clues: C{list} or C{tuple}
    """
    from Halberd.clues.Clue import Clue

    different = diff_fields(clues)

    # First alter Clue to be able to cope with the varying fields.
    ignored = []
    for field in different:
        method = "_get_" + Clue.normalize(field)
        if not hasattr(Clue, method):
            logger.debug("ignoring %s", field)
            ignored.append(method)
            setattr(Clue, method, lambda s, f: None)

    for clue in clues:
        Clue.parse(clue, clue.headers)

    for method in ignored:
        # We want to leave the Clue class as before because a MIME field
        # causing trouble for the current scan might be the source of precious
        # information for another scan.
        delattr(Clue, method)

    return clues
Exemple #2
0
def ignore_changing_fields(clues):
    """Tries to detect and ignore MIME fields with ever changing content.

    Some servers might include fields varying with time, randomly, etc. Those
    fields are likely to alter the clue's digest and interfer with L{analyze},
    producing many false positives and making the scan useless. This function
    detects those fields and recalculates each clue's digest so they can be
    safely analyzed again.

    @param clues: Sequence of clues.
    @type clues: C{list} or C{tuple}
    """
    from Halberd.clues.Clue import Clue

    different = diff_fields(clues)

    # First alter Clue to be able to cope with the varying fields.
    ignored = []
    for field in different:
        method = '_get_' + Clue.normalize(field)
        if not hasattr(Clue, method):
            logger.debug('ignoring %s', field)
            ignored.append(method)
            setattr(Clue, method, lambda s, f: None)

    for clue in clues:
        Clue.parse(clue, clue.headers)

    for method in ignored:
        # We want to leave the Clue class as before because a MIME field
        # causing trouble for the current scan might be the source of precious
        # information for another scan.
        delattr(Clue, method)

    return clues
Exemple #3
0
def reanalyze(clues, analyzed, threshold):
    """Identify and ignore changing header fields.

    After initial analysis one must check that there aren't as many realservers
    as obtained clues. If there were it could be a sign of something wrong
    happening: each clue is different from the others due to one or more MIME
    header fields which change unexpectedly.

    @param clues: Raw sequence of clues.
    @type clues: C{list}

    @param analyzed: Result from the first analysis phase.
    @type analyzed: C{list}

    @param threshold: Minimum clue-to-realserver ratio in order to trigger
    field inspection.
    @type threshold: C{float}
    """

    def ratio():
        return len(analyzed) / float(len(clues))

    assert len(clues) > 0

    r = ratio()
    if r >= threshold:
        logger.debug("clue-to-realserver ratio is high (%.3f)", r)
        logger.debug("reanalyzing clues...")

        ignore_changing_fields(clues)
        analyzed = analyze(clues)

        logger.debug("clue reanalysis done.")

    # Check again to see if we solved the problem but only warn the user if
    # there's a significant amount of evidence.
    if ratio() >= threshold and len(clues) > 10:
        logger.warn(
            """The following results might be incorrect.  It could be because the remote
host keeps changing its server version string or because halberd didn't have
enough samples."""
        )

    return analyzed
Exemple #4
0
def reanalyze(clues, analyzed, threshold):
    """Identify and ignore changing header fields.

    After initial analysis one must check that there aren't as many realservers
    as obtained clues. If there were it could be a sign of something wrong
    happening: each clue is different from the others due to one or more MIME
    header fields which change unexpectedly.

    @param clues: Raw sequence of clues.
    @type clues: C{list}

    @param analyzed: Result from the first analysis phase.
    @type analyzed: C{list}

    @param threshold: Minimum clue-to-realserver ratio in order to trigger
    field inspection.
    @type threshold: C{float}
    """
    def ratio():
        return len(analyzed) / float(len(clues))

    assert len(clues) > 0

    r = ratio()
    if r >= threshold:
        logger.debug('clue-to-realserver ratio is high (%.3f)', r)
        logger.debug('reanalyzing clues...')

        ignore_changing_fields(clues)
        analyzed = analyze(clues)

        logger.debug('clue reanalysis done.')

    # Check again to see if we solved the problem but only warn the user if
    # there's a significant amount of evidence.
    if ratio() >= threshold and len(clues) > 10:
        logger.warn(
            '''The following results might be incorrect.  It could be because the remote
host keeps changing its server version string or because halberd didn't have
enough samples.''')

    return analyzed