Example #1
0
def fromPcap(pcap):
    logger.info(lineid(), " Extracting features from pcap @ {}".format(pcap))
    requests = dns_parser.dnsRequests(pcap)
    if requests == {}:
        logger.info(lineid(), " No DNS Requests; defaulting to all-0 feature values")
        return 0,0,0
    mostCommonResp = collections.Counter(requests.values()).most_common(1)[0][0]
    logger.info(
            lineid(), 
            " Most common DNS Response: {}".format(mostCommonResp)
    )
    relevantDomains = sorted([
        k 
        for k in requests 
        if requests[k]==mostCommonResp
    ])
    logger.info(
            lineid(), 
            " Requests that got this response: {}".format(relevantDomains)
    )
    collusionScore = len(relevantDomains)
    logger.info(
            lineid(), 
            "Maximum domain collusion: {}".format(collusionScore)
    )
    longestRelevantDomains = sorted(relevantDomains, key=len, reverse=True)[:10]
    logger.info(
            lineid(), 
            "Longest 10 relevant requests: {}".format(longestRelevantDomains)
    )
    pronScore = lexScore = 0
    if len(relevantDomains)>0:
        longestRelevantDomains = list(filter(
            lambda x: len(x)>0, 
            longestRelevantDomains
        ))
        if longestRelevantDomains == []: return 0,0,0
        logger.info(lineid(), "Calculating pronunciation deviancy")
        pronScore = np.average(list(map(
            lambda x: gutenbergFreqtable.logProb(x)/(len(x)**2), 
            longestRelevantDomains
        )))
        logger.info(
                lineid(),
                "Final pronunciation deviancy: {}".format(pronScore)
        )
        logger.info(lineid(), "Calculating lexical deviancy")
        lexScore = np.average(list(map(
            lexical_rating.lexicalDeviancy, 
            longestRelevantDomains
        )))
    logger.info(
                lineid(),
                "Final lexical deviancy: {}".format(lexScore)
        )

    return collusionScore, pronScore, lexScore
Example #2
0
def delDictLookup(form):
    result = mongoconn.deletionsDictionary.find_one({"_id": form})
    if result is not None:
        logger.info(
            lineid(),
            "DelDict lookup: Form '{}', distance {}".format(form, result))
    else:
        logger.info(
            lineid(),
            "DelDict lookup: Form '{}' not in deletions dictionary".format(
                form))
    if result == None: return result
    return int(result["distance"])
Example #3
0
def judge(x, ladder):
    """Converts a numeric value to a generic verdict in 
    {REASONABLE, BORDERLINE, EXCESSIVE}, based on a ladder which specifies the 
    relevant thresholds."""
    logger.info(lineid()," Passing judgment on value: {}".format(x))
    ladderReport = ", ".join(["Up to {} is {}".format(j[0],j[1].name) for j in ladder])
    logger.info(lineid(),ladderReport)
    cands = [l for l in ladder if x<=l[0]]
    if cands == []: #The readings are off the scale
        result =  ladder[-1][1] #Return highest value on scale
    else: 
        result = cands[0][1]
    logger.info(lineid()," Value is deemed {}".format(result.name))
    return result
Example #4
0
def judge(x, ladder):
    """Converts a numeric value to a generic verdict in 
    {REASONABLE, BORDERLINE, EXCESSIVE}, based on a ladder which specifies the 
    relevant thresholds."""
    logger.info(lineid(), " Passing judgment on value: {}".format(x))
    ladderReport = ", ".join(
        ["Up to {} is {}".format(j[0], j[1].name) for j in ladder])
    logger.info(lineid(), ladderReport)
    cands = [l for l in ladder if x <= l[0]]
    if cands == []:  #The readings are off the scale
        result = ladder[-1][1]  #Return highest value on scale
    else:
        result = cands[0][1]
    logger.info(lineid(), " Value is deemed {}".format(result.name))
    return result
Example #5
0
def delDictLookup(form):
    result = mongoconn.deletionsDictionary.find_one({"_id":form})
    if result is not None:
        logger.info(lineid(),"DelDict lookup: Form '{}', distance {}".format(
            form,
            result
            )
    )
    else:
        logger.info(
                lineid(),
                "DelDict lookup: Form '{}' not in deletions dictionary".format(
                    form
                )
        )
    if result==None: return result
    return int(result["distance"])
Example #6
0
 def logProb(self, buf):
     logger.info(lineid(),
                 " Calculating surprise bits from input: {}".format(buf))
     result = 0
     for i in range(1, len(buf) + 1):
         hist = buf[max(i - self.tableOrder, 0):i]
         while len(hist) > 0:
             extra = -log(self[hist] / self[hist[:-1]], 2)
             result += extra
             logger.info(
                 lineid(),
                 " {} bits of surprise from transition {} => {}".format(
                     extra, hist[:-1], hist))
             hist = hist[1:]
     logger.info(lineid(), " Total bits of surprise: {}".format(result))
     logger.info(lineid(),
                 " (Normalized: {})".format(result / (len(buf)**2)))
     return result
Example #7
0
def lexicalRating(word):
    logger.info(lineid(), "Computing lexical rating of word: {}".format(word))
    if word == "": return 0
    derivs = delDerivs(word, 2)
    distances = []
    for form in derivs:
        lookup = delDictLookup(form)
        if lookup is None: continue
        logger.info(
            lineid(), "\n\tInput-to-form: {}\n".format(derivs[form]) +
            "\tForm-to-dictionary: {}\n".format(lookup) +
            "\tTotal Distance: {}".format(derivs[form] + lookup))
        distances.append(derivs[form] + lookup)
    if distances == []: minDistance = len(word)
    else: minDistance = min(distances)
    finalPenalty = 1 + minDistance * log(len(word), 2)
    logger.info(lineid(),
                "Final word fragment penalty: {}".format(finalPenalty))
    return finalPenalty
Example #8
0
def delDerivs(buf, limit=None):
    if limit==None: limit=len(buf)+1
    result = {}
    for weight in range(limit+1):
        for mod in itertools.combinations(range(len(buf)),weight):
            vector = [1 if j in mod else 0 for j in range(len(buf))] 
            deriv = "".join([char for i,char in enumerate(buf) if vector[i]==0])
            if deriv not in result or result[deriv]>weight: result[deriv]=weight
    logger.info(lineid(),"Deletion derivatives of {}: {}".format(buf,result))
    return result
Example #9
0
def lexicalRating(word):
    logger.info(lineid(), "Computing lexical rating of word: {}".format(word))
    if word == "": return 0
    derivs = delDerivs(word,2)
    distances = []
    for form in derivs:
        lookup = delDictLookup(form)
        if lookup is None: continue
        logger.info(
                lineid(),
                "\n\tInput-to-form: {}\n".format(derivs[form])+ 
                "\tForm-to-dictionary: {}\n".format(lookup)+ 
                "\tTotal Distance: {}".format(derivs[form]+lookup)
        )
        distances.append(derivs[form]+lookup)
    if distances==[]: minDistance=len(word)
    else: minDistance = min(distances)
    finalPenalty = 1+minDistance*log(len(word),2)
    logger.info(lineid(),"Final word fragment penalty: {}".format(finalPenalty))
    return finalPenalty
Example #10
0
def lexicalDeviancy(buf):
    cur = buf
    result = 0
    while cur:
        step = None
        curRating = None
        for i in range(1, len(cur) + 1):
            if step is None or lexicalRating(cur[:i]) * step <= curRating * i:
                step = i
                curRating = lexicalRating(cur[:step])
        logger.info(lineid(),
                    "Greedy step: Taking away prefix {}".format(cur[:step]))
        result += lexicalRating(cur[:step])
        cur = cur[step:]
    result = result / len(buf)
    logger.info(
        lineid(),
        "Final, normalized lexical deviancy for whole input: {}".format(
            result))
    return result
Example #11
0
 def logProb(self,buf):
     logger.info(lineid()," Calculating surprise bits from input: {}".format(buf))
     result = 0
     for i in range(1,len(buf)+1):
         hist = buf[max(i-self.tableOrder,0):i]
         while len(hist)>0:
             extra = -log(self[hist] / self[hist[:-1]],2)
             result += extra
             logger.info(
                 lineid(),
                 " {} bits of surprise from transition {} => {}".format(
                     extra,
                     hist[:-1],
                     hist
                 )
             )
             hist = hist[1:]
     logger.info(lineid()," Total bits of surprise: {}".format(result))
     logger.info(lineid()," (Normalized: {})".format(result/(len(buf)**2)))
     return result
Example #12
0
def delDerivs(buf, limit=None):
    if limit == None: limit = len(buf) + 1
    result = {}
    for weight in range(limit + 1):
        for mod in itertools.combinations(range(len(buf)), weight):
            vector = [1 if j in mod else 0 for j in range(len(buf))]
            deriv = "".join(
                [char for i, char in enumerate(buf) if vector[i] == 0])
            if deriv not in result or result[deriv] > weight:
                result[deriv] = weight
    logger.info(lineid(), "Deletion derivatives of {}: {}".format(buf, result))
    return result
Example #13
0
def verdictFromSemiVerdict(v):
    """A 'semiverdict' is a mapping from the feature set to the set {REASONABLE, 
    BORDERLINE, EXCESSIVE}. This function converts a semiverdict to a final 
    verdict on a PCAP."""
    logger.info(
        lineid(), "Computing final verdict based on semiverdict:\n\
        \tDomain Collusion: {}\n\
        \tLexical Deviancy: {}\n\
        \tPronouncability Deviancy: {}".format(v["Collusions"].name,
                                               v["Lex. Dev."].name,
                                               v["Pron. Dev."].name))
    result = min(v["Collusions"].value,
                 max(v["Lex. Dev."].value, v["Pron. Dev."].value))
    return result
Example #14
0
def lexicalDeviancy(buf):
    cur = buf
    result = 0
    while cur:
        step = None
        curRating = None
        for i in range(1,len(cur)+1):
            if step is None or lexicalRating(cur[:i])*step <= curRating*i: 
                step = i
                curRating = lexicalRating(cur[:step])
        logger.info(
                lineid(),
                "Greedy step: Taking away prefix {}".format(cur[:step])
        )
        result += lexicalRating(cur[:step])
        cur = cur[step:]
    result = result/len(buf)
    logger.info(
            lineid(),
            "Final, normalized lexical deviancy for whole input: {}".format(
                result
            )
    )
    return result
Example #15
0
def verdictByFeatures(sic, pron, lex):
    """Takes as input values for a PCAP's domain collusion, pronunciation 
    deviancy and lexical deviancy, and outputs a tuple containing 1. the final 
    verdict in {Clean, Borderline, DGA}, and 2. the original semiverdict."""
    result = {}
    logger.info(
        lineid(), "Classifying PCAP based on features:\n\
        \tDomain Collusion: {}\n\
        \tLexical Deviancy: {}\n\
        \tPronouncability Deviancy: {}".format(sic, lex, pron))
    semiVerdict = {
        "Collusions": judge(sic, sicThresholds),
        "Lex. Dev.": judge(lex, lexThresholds),
        "Pron. Dev.": judge(pron, pronThresholds)
    }
    return verdictFromSemiVerdict(semiVerdict), semiVerdict
Example #16
0
def verdictByFeatures(sic, pron, lex):
    """Takes as input values for a PCAP's domain collusion, pronunciation 
    deviancy and lexical deviancy, and outputs a tuple containing 1. the final 
    verdict in {Clean, Borderline, DGA}, and 2. the original semiverdict."""
    result = {}
    logger.info(
        lineid(),
        "Classifying PCAP based on features:\n\
        \tDomain Collusion: {}\n\
        \tLexical Deviancy: {}\n\
        \tPronouncability Deviancy: {}".format(sic,lex,pron)
    )
    semiVerdict = {
            "Collusions":judge(sic,sicThresholds),
            "Lex. Dev.":judge(lex,lexThresholds),
            "Pron. Dev.":judge(pron,pronThresholds)
    }
    return verdictFromSemiVerdict(semiVerdict), semiVerdict
Example #17
0
def verdictFromSemiVerdict(v):
    """A 'semiverdict' is a mapping from the feature set to the set {REASONABLE, 
    BORDERLINE, EXCESSIVE}. This function converts a semiverdict to a final 
    verdict on a PCAP."""
    logger.info(
        lineid(),
        "Computing final verdict based on semiverdict:\n\
        \tDomain Collusion: {}\n\
        \tLexical Deviancy: {}\n\
        \tPronouncability Deviancy: {}".format(
            v["Collusions"].name,
            v["Lex. Dev."].name,
            v["Pron. Dev."].name
        )
    )
    result = min(
            v["Collusions"].value,
            max(
                v["Lex. Dev."].value,
                v["Pron. Dev."].value
            )
    )
    return result
Example #18
0
def verdictByFeatures(sic, pron, lex):
    """Takes as input values for a PCAP's domain collusion, pronunciation 
    deviancy and lexical deviancy, and outputs a tuple containing 1. the final 
    verdict in {Clean, Borderline, DGA}, and 2. the original semiverdict."""
    result = {}
    logger.info(
        lineid(),
        "Classifying PCAP based on features:\n\
        \tDomain Collusion: {}\n\
        \tLexical Deviancy: {}\n\
        \tPronouncability Deviancy: {}".format(sic,lex,pron)
    )
    semiVerdict = {
            "Collusions":judge(sic,sicThresholds),
            "Lex. Dev.":judge(lex,lexThresholds),
            "Pron. Dev.":judge(pron,pronThresholds)
    }
    return verdictFromSemiVerdict(semiVerdict), semiVerdict

if __name__ == "__main__":
    logger.info(lineid()," Beginning analysis of traffic sample: {}".format(sys.argv[1]))
    final, semi = verdictByFeatures(*dga_features.fromPcap(sys.argv[1]))
    if final==REASONABLE.value:
        print("Probably not DGA")
    if final==BORDERLINE.value:
        print("Possibly DGA, too close to call")
    if final==EXCESSIVE.value:
        print("Probably DGA")
    
Example #19
0
def verdictByFeatures(sic, pron, lex):
    """Takes as input values for a PCAP's domain collusion, pronunciation 
    deviancy and lexical deviancy, and outputs a tuple containing 1. the final 
    verdict in {Clean, Borderline, DGA}, and 2. the original semiverdict."""
    result = {}
    logger.info(
        lineid(), "Classifying PCAP based on features:\n\
        \tDomain Collusion: {}\n\
        \tLexical Deviancy: {}\n\
        \tPronouncability Deviancy: {}".format(sic, lex, pron))
    semiVerdict = {
        "Collusions": judge(sic, sicThresholds),
        "Lex. Dev.": judge(lex, lexThresholds),
        "Pron. Dev.": judge(pron, pronThresholds)
    }
    return verdictFromSemiVerdict(semiVerdict), semiVerdict


if __name__ == "__main__":
    logger.info(
        lineid(),
        " Beginning analysis of traffic sample: {}".format(sys.argv[1]))
    final, semi = verdictByFeatures(*dga_features.fromPcap(sys.argv[1]))
    if final == REASONABLE.value:
        print("Probably not DGA")
    if final == BORDERLINE.value:
        print("Possibly DGA, too close to call")
    if final == EXCESSIVE.value:
        print("Probably DGA")