def store_output_files(): data.store_data(cname_dir + cname_dict_filename, cnames) data.store_data(cname_dir + auth_dict_filename, auths) list_data.store_data(cname_dir + cname_filename, list(cname_list)) list_data.store_data(cname_dir + auth_filename, list(auth_list)) data.store_data(cname_dir + fail_cname_dict_filename, fail_cnames) data.store_data(cname_dir + fail_auth_dict_filename, fail_auths) list_data.store_data(cname_dir + fail_cname_filename, list(fail_cname_list)) list_data.store_data(cname_dir + fail_auth_filename, list(fail_auth_list))
def store_output_files(): data.store_data(output_dir + cname_dict_filename, cnames) data.store_data(output_dir + auth_dict_filename, auths) list_data.store_data(output_dir + cname_filename, list(cname_list)) list_data.store_data(output_dir + auth_filename, list(auth_list)) data.store_data(output_dir + fail_cname_dict_filename, fail_cnames) data.store_data(output_dir + fail_auth_dict_filename, fail_auths) list_data.store_data(output_dir + fail_cname_filename, list(fail_cname_list)) list_data.store_data(output_dir + fail_auth_filename, list(fail_auth_list))
def store_output_files(): # print "Store!" hostnames.update(set(list_data.load_data(hosts_dir + hostname_filename))) list_data.store_data(hosts_dir + hostname_filename, list(hostnames))
def store_output_files(): data.store_data(geo_dir + geo_dict_filename, geo) geo_list = geo_dict_to_csv(geo) list_data.store_data(geo_dir + geo_filename, geo_list)
def store_output_files(): data.store_data(geo_dir + geo_dict_filename, geo) geo_list = geo_dict_to_csv(geo) list_data.store_data(geo_dir + geo_filename, geo_list) data.store_data(geo_dir + exist_ip_filename, located_ips)
label = int(m.group(1)) # remains = m.group(2) # print "label: " + str(label) # print "remain: " + remains if label == 0: nonpeak_list.append(line) elif label == 1: peak_list.append(line) # m = re.search('(\d+):(-*\d+\.*\d*e*-*\d*)( *)(.*)', remains) # ret = m.groups() # while len(ret) > 0: # remains = ret[3] # if remains == "": # break # m = re.search('(\d+):(-*\d+\.*\d*e*\d*-*\d*)( *)(.*)', remains) # ret = m.groups() f.close() num_peaks = len(peak_list) num_nonpeaks = len(nonpeak_list) print " num peak samples: %d" % (num_peaks) print " num non-peak samples: %d" % (num_nonpeaks) train_filename = "%srx.3.train_traces.%d.txt" % (output_dir, num_train_traces) list_data.store_data(train_filename, peak_list + nonpeak_list) os.system("python svm_easy.py " + train_filename)
def store_output_files(): list_data.store_data(dns_dir + dns_filename, list(valid_dns)) list_data.store_data(dns_dir + bad_dns_filename, list(invalid_dns))
## add to failed auths for hostname in this_fail_auths: if hostname in fail_auths: fail_auths[hostname].update(this_fail_auths[hostname]) else: fail_auths[hostname] = this_fail_auths[hostname] fail_auth_list.update(this_fail_auth_list) ## remove tmp_dir os.system("rm -rf %s" % (tmp_dir)) time.sleep(0.1) ## Update local files store_output_files() if len(nodes_ready) > 0: list_data.store_data(plnode_dir + ready_node_filename, nodes_ready) if DEBUG3: print " Ready Nodes: " print " " + "\n ".join(nodes_ready) print " Running Nodes: " print " " + "\n ".join(nodes_running) print " Bad Nodes: " print " " + "\n ".join(nodes_bad) print " # Hostnames: %d" % (len(cnames)) print " # CNAMEs: %d" % (len(cname_list)) print " # Domains: %d" % (len(auths)) print " # DNSes: %d" % (len(auth_list))
if DEBUG2: print "Arrange Jobs" njobs = math.ceil(len(cnames) * 1.0 / len(nodes)) if DEBUG3: print " %d jobs per node" % (njobs) # exit() ################### ## Generate Parameter Files ################### if DEBUG2: print "Generate Parameter Files" # nodes = ["planetlab1.ie.cuhk.edu.hk"] ## assigned DNS list_data.store_data(PARAM_FILE_AUTH, auths) ## assigned CNAME for ni in xrange(0,len(nodes)): node = nodes[ni] std = int(ni * njobs) end = int( min((ni+1)*njobs-1, len(cnames)-1) ) if DEBUG3: print("------------------------\n %d/%d: %s [%d-%d]" % (ni+1, len(nodes), node, std, end)) while std >= len(cnames): # if DEBUG3: print(" no more cnames") # break; std = int(std - len(cnames)) end = int( min(std+njobs-1, len(cnames)-1) )
if hostname in fail_auths: fail_auths[hostname].update(this_fail_auths[hostname]) else: fail_auths[hostname] = this_fail_auths[hostname] fail_auth_list.update(this_fail_auth_list) ## remove tmp_dir os.system("rm -rf %s" % (tmp_dir)) time.sleep(0.1) ## Update local files store_output_files() if len(nodes_ready) > 0: list_data.store_data(plnode_dir + ready_node_filename, nodes_ready) if DEBUG3: print " Ready Nodes: " print " "+"\n ".join(nodes_ready) print " Running Nodes: " print " "+"\n ".join(nodes_running) print " Bad Nodes: " print " "+"\n ".join(nodes_bad) print " # Hostnames: %d" % (len(cnames)) print " # CNAMEs: %d" % (len(cname_list)) print " # Domains: %d" % (len(auths)) print " # DNSes: %d" % (len(auth_list))
num_train_nonpeaks = 10 * num_train_peaks num_test_nonpeaks = num_nonpeaks - num_train_nonpeaks for fi in xrange(0,fold): train_peak_idx = sorted(random.sample(xrange(num_peaks), num_train_peaks)) train_nonpeak_idx = sorted(random.sample(xrange(num_nonpeaks), num_train_nonpeaks)) test_peak_idx = list(set(xrange(num_peaks)) - set(train_peak_idx)) test_nonpeak_idx = list(set(xrange(num_nonpeaks)) - set(train_nonpeak_idx)) train_peak_list = [peak_list[i] for i in train_peak_idx] train_nonpeak_list = [nonpeak_list[i] for i in train_nonpeak_idx] train_list = train_peak_list + train_nonpeak_list print " fold%d: train len=%d" % (fi, len(train_list)) test_peak_list = [peak_list[i] for i in test_peak_idx] test_nonpeak_list = [nonpeak_list[i] for i in test_nonpeak_idx] test_list = test_peak_list + test_nonpeak_list print " fold%d: test len=%d" % (fi, len(test_list)) list_data.store_data("%srx.3.all.test%d.txt" % (output_dir, fi), test_list) list_data.store_data("%srx.3.all.train%d.txt" % (output_dir, fi), train_list) # os.system("python svm_easy.py %srx.3.all.train%d.txt %srx.3.all.test%d.txt" % (output_dir, fi, output_dir, fi)) #
def store_output_files(): data.store_data(ips_dir + ip_dict_filename, ips) ip_list = dict_ips_to_list(ips) list_data.store_data(ips_dir + ip_filename, ip_list)
################### if DEBUG2: print "Arrange Jobs" njobs = math.ceil(len(cnames) * 1.0 / len(nodes)) if DEBUG3: print " %d jobs per node" % (njobs) # exit() ################### ## Generate Parameter Files ################### if DEBUG2: print "Generate Parameter Files" # nodes = ["planetlab1.ie.cuhk.edu.hk"] ## assigned DNS list_data.store_data(PARAM_FILE_AUTH, auths) ## assigned CNAME for ni in xrange(0, len(nodes)): node = nodes[ni] std = int(ni * njobs) end = int(min((ni + 1) * njobs - 1, len(cnames) - 1)) if DEBUG3: print("------------------------\n %d/%d: %s [%d-%d]" % (ni + 1, len(nodes), node, std, end)) while std >= len(cnames): # if DEBUG3: print(" no more cnames") # break; std = int(std - len(cnames)) end = int(min(std + njobs - 1, len(cnames) - 1))
# num_train_nonpeaks = int(num_nonpeaks - num_test_nonpeaks) num_train_nonpeaks = 10 * num_train_peaks num_test_nonpeaks = num_nonpeaks - num_train_nonpeaks for fi in xrange(0, fold): train_peak_idx = sorted(random.sample(xrange(num_peaks), num_train_peaks)) train_nonpeak_idx = sorted( random.sample(xrange(num_nonpeaks), num_train_nonpeaks)) test_peak_idx = list(set(xrange(num_peaks)) - set(train_peak_idx)) test_nonpeak_idx = list(set(xrange(num_nonpeaks)) - set(train_nonpeak_idx)) train_peak_list = [peak_list[i] for i in train_peak_idx] train_nonpeak_list = [nonpeak_list[i] for i in train_nonpeak_idx] train_list = train_peak_list + train_nonpeak_list print " fold%d: train len=%d" % (fi, len(train_list)) test_peak_list = [peak_list[i] for i in test_peak_idx] test_nonpeak_list = [nonpeak_list[i] for i in test_nonpeak_idx] test_list = test_peak_list + test_nonpeak_list print " fold%d: test len=%d" % (fi, len(test_list)) list_data.store_data("%srx.3.all.test%d.txt" % (output_dir, fi), test_list) list_data.store_data("%srx.3.all.train%d.txt" % (output_dir, fi), train_list) # os.system("python svm_easy.py %srx.3.all.train%d.txt %srx.3.all.test%d.txt" % (output_dir, fi, output_dir, fi)) #
def store_output_files(): # ips = merge_ips(ips, data.load_data(ips_dir + ip_dict_filename)) data.store_data(ips_dir + ip_dict_filename, ips) ip_list = dict_ips_to_list(ips) list_data.store_data(ips_dir + ip_filename, ip_list)
for cdn in ips: ips_cdn = set() for dns in ips[cdn]: ips_cdn.update(ips[cdn][dns]) num_ips_cname.append(len(ips_cdn)) for provider in providers: m = re.search(provider, cdn) if m is not None: ips_provider[provider].update(ips_cdn) num_ips_provider[provider].append(len(ips_cdn)) num_cnames_provider[provider].add(cdn) list_data.store_data(output_dir + num_ips_cname_filename, map(str, num_ips_cname)) ## number of cnames per provider fh = open(output_dir + "num_cnames_per_provider.txt", 'w') for provider in providers: fh.write("%d\n" % (len(num_cnames_provider[provider]))) fh.close() ## number of IPs per provider cnt = [] for provider in providers: print " %s: #cnames=%d" % (provider, len(num_ips_provider[provider])) print " %s: #unique ips=%d" % (provider, len(ips_provider[provider])) list_data.store_data(output_dir + num_ips_provider_filename + "." + provider + ".txt", map(str, num_ips_provider[provider]))
def store_output_files(websites): list_data.store_data(output_dir + filename, list(websites))
for cdn in ips: ips_cdn = set() for dns in ips[cdn]: ips_cdn.update(ips[cdn][dns]) num_ips_cname.append(len(ips_cdn)) for provider in providers: m = re.search(provider, cdn) if m is not None: ips_provider[provider].update(ips_cdn) num_ips_provider[provider].append(len(ips_cdn)) num_cnames_provider[provider].add(cdn) list_data.store_data(output_dir + num_ips_cname_filename, map(str, num_ips_cname)) ## number of cnames per provider fh = open(output_dir + "num_cnames_per_provider.txt", 'w') for provider in providers: fh.write("%d\n" % (len(num_cnames_provider[provider]))) fh.close() ## number of IPs per provider cnt = [] for provider in providers: print " %s: #cnames=%d" % (provider, len(num_ips_provider[provider])) print " %s: #unique ips=%d" % (provider, len(ips_provider[provider])) list_data.store_data( output_dir + num_ips_provider_filename + "." + provider + ".txt", map(str, num_ips_provider[provider]))
# print "remain: " + remains if label == 0: nonpeak_list.append(line) elif label == 1: peak_list.append(line) # m = re.search('(\d+):(-*\d+\.*\d*e*-*\d*)( *)(.*)', remains) # ret = m.groups() # while len(ret) > 0: # remains = ret[3] # if remains == "": # break # m = re.search('(\d+):(-*\d+\.*\d*e*\d*-*\d*)( *)(.*)', remains) # ret = m.groups() f.close() num_peaks = len(peak_list) num_nonpeaks = len(nonpeak_list) print " num peak samples: %d" % (num_peaks) print " num non-peak samples: %d" % (num_nonpeaks) train_filename = "%srx.3.train_traces.%d.txt" % (output_dir, num_train_traces) list_data.store_data(train_filename, peak_list+nonpeak_list) os.system("python svm_easy.py " + train_filename)