def fromPcap(pcap): logger.info(lineid(), " Extracting features from pcap @ {}".format(pcap)) requests = dns_parser.dnsRequests(pcap) if requests == {}: logger.info(lineid(), " No DNS Requests; defaulting to all-0 feature values") return 0,0,0 mostCommonResp = collections.Counter(requests.values()).most_common(1)[0][0] logger.info( lineid(), " Most common DNS Response: {}".format(mostCommonResp) ) relevantDomains = sorted([ k for k in requests if requests[k]==mostCommonResp ]) logger.info( lineid(), " Requests that got this response: {}".format(relevantDomains) ) collusionScore = len(relevantDomains) logger.info( lineid(), "Maximum domain collusion: {}".format(collusionScore) ) longestRelevantDomains = sorted(relevantDomains, key=len, reverse=True)[:10] logger.info( lineid(), "Longest 10 relevant requests: {}".format(longestRelevantDomains) ) pronScore = lexScore = 0 if len(relevantDomains)>0: longestRelevantDomains = list(filter( lambda x: len(x)>0, longestRelevantDomains )) if longestRelevantDomains == []: return 0,0,0 logger.info(lineid(), "Calculating pronunciation deviancy") pronScore = np.average(list(map( lambda x: gutenbergFreqtable.logProb(x)/(len(x)**2), longestRelevantDomains ))) logger.info( lineid(), "Final pronunciation deviancy: {}".format(pronScore) ) logger.info(lineid(), "Calculating lexical deviancy") lexScore = np.average(list(map( lexical_rating.lexicalDeviancy, longestRelevantDomains ))) logger.info( lineid(), "Final lexical deviancy: {}".format(lexScore) ) return collusionScore, pronScore, lexScore
def delDictLookup(form): result = mongoconn.deletionsDictionary.find_one({"_id": form}) if result is not None: logger.info( lineid(), "DelDict lookup: Form '{}', distance {}".format(form, result)) else: logger.info( lineid(), "DelDict lookup: Form '{}' not in deletions dictionary".format( form)) if result == None: return result return int(result["distance"])
def judge(x, ladder): """Converts a numeric value to a generic verdict in {REASONABLE, BORDERLINE, EXCESSIVE}, based on a ladder which specifies the relevant thresholds.""" logger.info(lineid()," Passing judgment on value: {}".format(x)) ladderReport = ", ".join(["Up to {} is {}".format(j[0],j[1].name) for j in ladder]) logger.info(lineid(),ladderReport) cands = [l for l in ladder if x<=l[0]] if cands == []: #The readings are off the scale result = ladder[-1][1] #Return highest value on scale else: result = cands[0][1] logger.info(lineid()," Value is deemed {}".format(result.name)) return result
def judge(x, ladder): """Converts a numeric value to a generic verdict in {REASONABLE, BORDERLINE, EXCESSIVE}, based on a ladder which specifies the relevant thresholds.""" logger.info(lineid(), " Passing judgment on value: {}".format(x)) ladderReport = ", ".join( ["Up to {} is {}".format(j[0], j[1].name) for j in ladder]) logger.info(lineid(), ladderReport) cands = [l for l in ladder if x <= l[0]] if cands == []: #The readings are off the scale result = ladder[-1][1] #Return highest value on scale else: result = cands[0][1] logger.info(lineid(), " Value is deemed {}".format(result.name)) return result
def delDictLookup(form): result = mongoconn.deletionsDictionary.find_one({"_id":form}) if result is not None: logger.info(lineid(),"DelDict lookup: Form '{}', distance {}".format( form, result ) ) else: logger.info( lineid(), "DelDict lookup: Form '{}' not in deletions dictionary".format( form ) ) if result==None: return result return int(result["distance"])
def logProb(self, buf): logger.info(lineid(), " Calculating surprise bits from input: {}".format(buf)) result = 0 for i in range(1, len(buf) + 1): hist = buf[max(i - self.tableOrder, 0):i] while len(hist) > 0: extra = -log(self[hist] / self[hist[:-1]], 2) result += extra logger.info( lineid(), " {} bits of surprise from transition {} => {}".format( extra, hist[:-1], hist)) hist = hist[1:] logger.info(lineid(), " Total bits of surprise: {}".format(result)) logger.info(lineid(), " (Normalized: {})".format(result / (len(buf)**2))) return result
def lexicalRating(word): logger.info(lineid(), "Computing lexical rating of word: {}".format(word)) if word == "": return 0 derivs = delDerivs(word, 2) distances = [] for form in derivs: lookup = delDictLookup(form) if lookup is None: continue logger.info( lineid(), "\n\tInput-to-form: {}\n".format(derivs[form]) + "\tForm-to-dictionary: {}\n".format(lookup) + "\tTotal Distance: {}".format(derivs[form] + lookup)) distances.append(derivs[form] + lookup) if distances == []: minDistance = len(word) else: minDistance = min(distances) finalPenalty = 1 + minDistance * log(len(word), 2) logger.info(lineid(), "Final word fragment penalty: {}".format(finalPenalty)) return finalPenalty
def delDerivs(buf, limit=None): if limit==None: limit=len(buf)+1 result = {} for weight in range(limit+1): for mod in itertools.combinations(range(len(buf)),weight): vector = [1 if j in mod else 0 for j in range(len(buf))] deriv = "".join([char for i,char in enumerate(buf) if vector[i]==0]) if deriv not in result or result[deriv]>weight: result[deriv]=weight logger.info(lineid(),"Deletion derivatives of {}: {}".format(buf,result)) return result
def lexicalRating(word): logger.info(lineid(), "Computing lexical rating of word: {}".format(word)) if word == "": return 0 derivs = delDerivs(word,2) distances = [] for form in derivs: lookup = delDictLookup(form) if lookup is None: continue logger.info( lineid(), "\n\tInput-to-form: {}\n".format(derivs[form])+ "\tForm-to-dictionary: {}\n".format(lookup)+ "\tTotal Distance: {}".format(derivs[form]+lookup) ) distances.append(derivs[form]+lookup) if distances==[]: minDistance=len(word) else: minDistance = min(distances) finalPenalty = 1+minDistance*log(len(word),2) logger.info(lineid(),"Final word fragment penalty: {}".format(finalPenalty)) return finalPenalty
def lexicalDeviancy(buf): cur = buf result = 0 while cur: step = None curRating = None for i in range(1, len(cur) + 1): if step is None or lexicalRating(cur[:i]) * step <= curRating * i: step = i curRating = lexicalRating(cur[:step]) logger.info(lineid(), "Greedy step: Taking away prefix {}".format(cur[:step])) result += lexicalRating(cur[:step]) cur = cur[step:] result = result / len(buf) logger.info( lineid(), "Final, normalized lexical deviancy for whole input: {}".format( result)) return result
def logProb(self,buf): logger.info(lineid()," Calculating surprise bits from input: {}".format(buf)) result = 0 for i in range(1,len(buf)+1): hist = buf[max(i-self.tableOrder,0):i] while len(hist)>0: extra = -log(self[hist] / self[hist[:-1]],2) result += extra logger.info( lineid(), " {} bits of surprise from transition {} => {}".format( extra, hist[:-1], hist ) ) hist = hist[1:] logger.info(lineid()," Total bits of surprise: {}".format(result)) logger.info(lineid()," (Normalized: {})".format(result/(len(buf)**2))) return result
def delDerivs(buf, limit=None): if limit == None: limit = len(buf) + 1 result = {} for weight in range(limit + 1): for mod in itertools.combinations(range(len(buf)), weight): vector = [1 if j in mod else 0 for j in range(len(buf))] deriv = "".join( [char for i, char in enumerate(buf) if vector[i] == 0]) if deriv not in result or result[deriv] > weight: result[deriv] = weight logger.info(lineid(), "Deletion derivatives of {}: {}".format(buf, result)) return result
def verdictFromSemiVerdict(v): """A 'semiverdict' is a mapping from the feature set to the set {REASONABLE, BORDERLINE, EXCESSIVE}. This function converts a semiverdict to a final verdict on a PCAP.""" logger.info( lineid(), "Computing final verdict based on semiverdict:\n\ \tDomain Collusion: {}\n\ \tLexical Deviancy: {}\n\ \tPronouncability Deviancy: {}".format(v["Collusions"].name, v["Lex. Dev."].name, v["Pron. Dev."].name)) result = min(v["Collusions"].value, max(v["Lex. Dev."].value, v["Pron. Dev."].value)) return result
def lexicalDeviancy(buf): cur = buf result = 0 while cur: step = None curRating = None for i in range(1,len(cur)+1): if step is None or lexicalRating(cur[:i])*step <= curRating*i: step = i curRating = lexicalRating(cur[:step]) logger.info( lineid(), "Greedy step: Taking away prefix {}".format(cur[:step]) ) result += lexicalRating(cur[:step]) cur = cur[step:] result = result/len(buf) logger.info( lineid(), "Final, normalized lexical deviancy for whole input: {}".format( result ) ) return result
def verdictByFeatures(sic, pron, lex): """Takes as input values for a PCAP's domain collusion, pronunciation deviancy and lexical deviancy, and outputs a tuple containing 1. the final verdict in {Clean, Borderline, DGA}, and 2. the original semiverdict.""" result = {} logger.info( lineid(), "Classifying PCAP based on features:\n\ \tDomain Collusion: {}\n\ \tLexical Deviancy: {}\n\ \tPronouncability Deviancy: {}".format(sic, lex, pron)) semiVerdict = { "Collusions": judge(sic, sicThresholds), "Lex. Dev.": judge(lex, lexThresholds), "Pron. Dev.": judge(pron, pronThresholds) } return verdictFromSemiVerdict(semiVerdict), semiVerdict
def verdictByFeatures(sic, pron, lex): """Takes as input values for a PCAP's domain collusion, pronunciation deviancy and lexical deviancy, and outputs a tuple containing 1. the final verdict in {Clean, Borderline, DGA}, and 2. the original semiverdict.""" result = {} logger.info( lineid(), "Classifying PCAP based on features:\n\ \tDomain Collusion: {}\n\ \tLexical Deviancy: {}\n\ \tPronouncability Deviancy: {}".format(sic,lex,pron) ) semiVerdict = { "Collusions":judge(sic,sicThresholds), "Lex. Dev.":judge(lex,lexThresholds), "Pron. Dev.":judge(pron,pronThresholds) } return verdictFromSemiVerdict(semiVerdict), semiVerdict
def verdictFromSemiVerdict(v): """A 'semiverdict' is a mapping from the feature set to the set {REASONABLE, BORDERLINE, EXCESSIVE}. This function converts a semiverdict to a final verdict on a PCAP.""" logger.info( lineid(), "Computing final verdict based on semiverdict:\n\ \tDomain Collusion: {}\n\ \tLexical Deviancy: {}\n\ \tPronouncability Deviancy: {}".format( v["Collusions"].name, v["Lex. Dev."].name, v["Pron. Dev."].name ) ) result = min( v["Collusions"].value, max( v["Lex. Dev."].value, v["Pron. Dev."].value ) ) return result
def verdictByFeatures(sic, pron, lex): """Takes as input values for a PCAP's domain collusion, pronunciation deviancy and lexical deviancy, and outputs a tuple containing 1. the final verdict in {Clean, Borderline, DGA}, and 2. the original semiverdict.""" result = {} logger.info( lineid(), "Classifying PCAP based on features:\n\ \tDomain Collusion: {}\n\ \tLexical Deviancy: {}\n\ \tPronouncability Deviancy: {}".format(sic,lex,pron) ) semiVerdict = { "Collusions":judge(sic,sicThresholds), "Lex. Dev.":judge(lex,lexThresholds), "Pron. Dev.":judge(pron,pronThresholds) } return verdictFromSemiVerdict(semiVerdict), semiVerdict if __name__ == "__main__": logger.info(lineid()," Beginning analysis of traffic sample: {}".format(sys.argv[1])) final, semi = verdictByFeatures(*dga_features.fromPcap(sys.argv[1])) if final==REASONABLE.value: print("Probably not DGA") if final==BORDERLINE.value: print("Possibly DGA, too close to call") if final==EXCESSIVE.value: print("Probably DGA")
def verdictByFeatures(sic, pron, lex): """Takes as input values for a PCAP's domain collusion, pronunciation deviancy and lexical deviancy, and outputs a tuple containing 1. the final verdict in {Clean, Borderline, DGA}, and 2. the original semiverdict.""" result = {} logger.info( lineid(), "Classifying PCAP based on features:\n\ \tDomain Collusion: {}\n\ \tLexical Deviancy: {}\n\ \tPronouncability Deviancy: {}".format(sic, lex, pron)) semiVerdict = { "Collusions": judge(sic, sicThresholds), "Lex. Dev.": judge(lex, lexThresholds), "Pron. Dev.": judge(pron, pronThresholds) } return verdictFromSemiVerdict(semiVerdict), semiVerdict if __name__ == "__main__": logger.info( lineid(), " Beginning analysis of traffic sample: {}".format(sys.argv[1])) final, semi = verdictByFeatures(*dga_features.fromPcap(sys.argv[1])) if final == REASONABLE.value: print("Probably not DGA") if final == BORDERLINE.value: print("Possibly DGA, too close to call") if final == EXCESSIVE.value: print("Probably DGA")