def createRiskHashes(malSource, googleSource, ignore_domain_list=[]): # ignore_domain_list is a list of domains which will be ignored # (won't appear as malware/copyright domains) for evaluation flow global malwareDomains global copyrightDomains clear_risk_hashes( ) #clear malwareDomains copyrightDomains dicts (might be full from optional prev run) malware_file_path = '/home/michal/malware_dict.csv' copyright_file_path = '/home/michal/copyright_dict.csv' if os.path.exists(malware_file_path): malwareDomains = gm.readDict(malware_file_path) copyrightDomains = gm.readDict(copyright_file_path) else: # First time- the files don't exist createDomainsHashFromFile('malwareDomains', malSource) #DEBUG: writeHashToFile('malwareDomains','/home/michal/Desktop/malwareDomainsForTest') gm.saveDict(malware_file_path, malwareDomains) createDomainsHashFromFile('copyrightDomains', googleSource) #DEBUG: writeHashToFile('copyrightDomains','/home/michal/Desktop/copyrightDomainsForTest') gm.saveDict(copyright_file_path, copyrightDomains) if len(ignore_domain_list): #for evaluation cases- this won't be zero for d in ignore_domain_list: if d in malwareDomains: malwareDomains.pop(d) if d in copyrightDomains: copyrightDomains.pop(d) return
def create_clean_domains_file(src,dest): d = {} with open(src,"r") as f: for line in f: d.setdefault(trx.getDomainFromRequestedSite(line.rstrip()),None) gm.saveDict(dest, d) return
def plotting_fine_tuning(): aFile = '/home/michal/SALSA_files/tmp/real_run/salsa_a_dict_pickle' hFile = '/home/michal/SALSA_files/tmp/real_run/salsa_h_dict_pickle' a = gm.read_object_from_file(aFile) h = gm.read_object_from_file(hFile) gm.saveDict('/home/michal/SALSA_files/tmp/real_run/salsa_a_dict', a) gm.saveDict('/home/michal/SALSA_files/tmp/real_run/salsa_h_dict', h) print max(a.values()) gm.histogram_of_dict(a) return
def writeGraphToFile(self,filePath): graph_as_dict=nx.to_dict_of_dicts(self.dGraph) print graph_as_dict gm.saveDict(filePath, graph_as_dict) return
def writeGraphToFile(self, filePath): graph_as_dict = nx.to_dict_of_dicts(self.dGraph) print graph_as_dict gm.saveDict(filePath, graph_as_dict) return
def writeDomainRiskDictToFile(self,fileName): gm.saveDict(fileName, self.DRD) return
def writeTransDictToFile(self,fileName): gm.saveDict(fileName, self.TD) return