def store_output_files():
  data.store_data(cname_dir + cname_dict_filename, cnames)
  data.store_data(cname_dir + auth_dict_filename, auths)
  list_data.store_data(cname_dir + cname_filename, list(cname_list))
  list_data.store_data(cname_dir + auth_filename, list(auth_list))
  data.store_data(cname_dir + fail_cname_dict_filename, fail_cnames)
  data.store_data(cname_dir + fail_auth_dict_filename, fail_auths)
  list_data.store_data(cname_dir + fail_cname_filename, list(fail_cname_list))
  list_data.store_data(cname_dir + fail_auth_filename, list(fail_auth_list))
Example #2
0
def store_output_files():
    data.store_data(output_dir + cname_dict_filename, cnames)
    data.store_data(output_dir + auth_dict_filename, auths)
    list_data.store_data(output_dir + cname_filename, list(cname_list))
    list_data.store_data(output_dir + auth_filename, list(auth_list))
    data.store_data(output_dir + fail_cname_dict_filename, fail_cnames)
    data.store_data(output_dir + fail_auth_dict_filename, fail_auths)
    list_data.store_data(output_dir + fail_cname_filename,
                         list(fail_cname_list))
    list_data.store_data(output_dir + fail_auth_filename, list(fail_auth_list))
def store_output_files():
    # print "Store!"
    hostnames.update(set(list_data.load_data(hosts_dir + hostname_filename)))
    list_data.store_data(hosts_dir + hostname_filename, list(hostnames))
def store_output_files():
  data.store_data(geo_dir + geo_dict_filename, geo)
  geo_list = geo_dict_to_csv(geo)
  list_data.store_data(geo_dir + geo_filename, geo_list)
Example #5
0
def store_output_files():
  data.store_data(geo_dir + geo_dict_filename, geo)
  geo_list = geo_dict_to_csv(geo)
  list_data.store_data(geo_dir + geo_filename, geo_list)
  data.store_data(geo_dir + exist_ip_filename, located_ips)
Example #6
0
        label = int(m.group(1))
        # remains = m.group(2)
        # print "label: " + str(label)
        # print "remain: " + remains

        if label == 0:
            nonpeak_list.append(line)
        elif label == 1:
            peak_list.append(line)

        # m = re.search('(\d+):(-*\d+\.*\d*e*-*\d*)( *)(.*)', remains)
        # ret = m.groups()
        # while len(ret) > 0:
        #   remains = ret[3]
        #   if remains == "":
        #     break

        #   m = re.search('(\d+):(-*\d+\.*\d*e*\d*-*\d*)( *)(.*)', remains)
        #   ret = m.groups()
    f.close()

num_peaks = len(peak_list)
num_nonpeaks = len(nonpeak_list)
print "  num peak samples: %d" % (num_peaks)
print "  num non-peak samples: %d" % (num_nonpeaks)

train_filename = "%srx.3.train_traces.%d.txt" % (output_dir, num_train_traces)
list_data.store_data(train_filename, peak_list + nonpeak_list)

os.system("python svm_easy.py " + train_filename)
Example #7
0
def store_output_files():
  list_data.store_data(dns_dir + dns_filename, list(valid_dns))
  list_data.store_data(dns_dir + bad_dns_filename, list(invalid_dns))
    ## add to failed auths
    for hostname in this_fail_auths:
        if hostname in fail_auths:
            fail_auths[hostname].update(this_fail_auths[hostname])
        else:
            fail_auths[hostname] = this_fail_auths[hostname]

    fail_auth_list.update(this_fail_auth_list)

    ## remove tmp_dir
    os.system("rm -rf %s" % (tmp_dir))
    time.sleep(0.1)

## Update local files
store_output_files()
if len(nodes_ready) > 0:
    list_data.store_data(plnode_dir + ready_node_filename, nodes_ready)

if DEBUG3:
    print "  Ready Nodes: "
    print "    " + "\n    ".join(nodes_ready)
    print "  Running Nodes: "
    print "    " + "\n    ".join(nodes_running)
    print "  Bad Nodes: "
    print "    " + "\n    ".join(nodes_bad)
    print "  # Hostnames: %d" % (len(cnames))
    print "  # CNAMEs: %d" % (len(cname_list))
    print "  # Domains:  %d" % (len(auths))
    print "  # DNSes:  %d" % (len(auth_list))
if DEBUG2: print "Arrange Jobs"

njobs = math.ceil(len(cnames) * 1.0 / len(nodes))
if DEBUG3: print "  %d jobs per node" % (njobs)
# exit()


###################
## Generate Parameter Files
###################
if DEBUG2: print "Generate Parameter Files"

# nodes = ["planetlab1.ie.cuhk.edu.hk"]

## assigned DNS
list_data.store_data(PARAM_FILE_AUTH, auths)

## assigned CNAME
for ni in xrange(0,len(nodes)):
  node = nodes[ni]
  std = int(ni * njobs)
  end = int( min((ni+1)*njobs-1, len(cnames)-1) )
  if DEBUG3: print("------------------------\n  %d/%d: %s [%d-%d]" % (ni+1, len(nodes), node, std, end))

  while std >= len(cnames):
    # if DEBUG3: print("  no more cnames")
    # break;
    std = int(std - len(cnames))
    end = int( min(std+njobs-1, len(cnames)-1) )

    if hostname in fail_auths:
      fail_auths[hostname].update(this_fail_auths[hostname])
    else:
      fail_auths[hostname] = this_fail_auths[hostname]

  fail_auth_list.update(this_fail_auth_list)

  ## remove tmp_dir
  os.system("rm -rf %s" % (tmp_dir))
  time.sleep(0.1)


## Update local files
store_output_files()
if len(nodes_ready) > 0:
  list_data.store_data(plnode_dir + ready_node_filename, nodes_ready)


if DEBUG3:
  print "  Ready Nodes: "
  print "    "+"\n    ".join(nodes_ready)
  print "  Running Nodes: "
  print "    "+"\n    ".join(nodes_running)
  print "  Bad Nodes: "
  print "    "+"\n    ".join(nodes_bad)
  print "  # Hostnames: %d" % (len(cnames))
  print "  # CNAMEs: %d" % (len(cname_list))
  print "  # Domains:  %d" % (len(auths))
  print "  # DNSes:  %d" % (len(auth_list))

Example #11
0
def store_output_files():
    list_data.store_data(dns_dir + dns_filename, list(valid_dns))
    list_data.store_data(dns_dir + bad_dns_filename, list(invalid_dns))
Example #12
0
def store_output_files():
    data.store_data(geo_dir + geo_dict_filename, geo)
    geo_list = geo_dict_to_csv(geo)
    list_data.store_data(geo_dir + geo_filename, geo_list)
num_train_nonpeaks = 10 * num_train_peaks
num_test_nonpeaks  = num_nonpeaks - num_train_nonpeaks


for fi in xrange(0,fold):

  train_peak_idx = sorted(random.sample(xrange(num_peaks), num_train_peaks))
  train_nonpeak_idx = sorted(random.sample(xrange(num_nonpeaks), num_train_nonpeaks))

  test_peak_idx = list(set(xrange(num_peaks)) - set(train_peak_idx))
  test_nonpeak_idx = list(set(xrange(num_nonpeaks)) - set(train_nonpeak_idx))

  train_peak_list = [peak_list[i] for i in train_peak_idx]
  train_nonpeak_list = [nonpeak_list[i] for i in train_nonpeak_idx]
  train_list = train_peak_list + train_nonpeak_list
  print "    fold%d: train len=%d" % (fi, len(train_list))

  test_peak_list = [peak_list[i] for i in test_peak_idx]
  test_nonpeak_list = [nonpeak_list[i] for i in test_nonpeak_idx]
  test_list = test_peak_list + test_nonpeak_list
  print "    fold%d: test len=%d" % (fi, len(test_list))


  list_data.store_data("%srx.3.all.test%d.txt" % (output_dir, fi), test_list)
  list_data.store_data("%srx.3.all.train%d.txt" % (output_dir, fi), train_list)


  # os.system("python svm_easy.py %srx.3.all.train%d.txt %srx.3.all.test%d.txt" % (output_dir, fi, output_dir, fi))

#
def store_output_files():
    data.store_data(ips_dir + ip_dict_filename, ips)
    ip_list = dict_ips_to_list(ips)
    list_data.store_data(ips_dir + ip_filename, ip_list)
###################
if DEBUG2: print "Arrange Jobs"

njobs = math.ceil(len(cnames) * 1.0 / len(nodes))
if DEBUG3: print "  %d jobs per node" % (njobs)
# exit()

###################
## Generate Parameter Files
###################
if DEBUG2: print "Generate Parameter Files"

# nodes = ["planetlab1.ie.cuhk.edu.hk"]

## assigned DNS
list_data.store_data(PARAM_FILE_AUTH, auths)

## assigned CNAME
for ni in xrange(0, len(nodes)):
    node = nodes[ni]
    std = int(ni * njobs)
    end = int(min((ni + 1) * njobs - 1, len(cnames) - 1))
    if DEBUG3:
        print("------------------------\n  %d/%d: %s [%d-%d]" %
              (ni + 1, len(nodes), node, std, end))

    while std >= len(cnames):
        # if DEBUG3: print("  no more cnames")
        # break;
        std = int(std - len(cnames))
        end = int(min(std + njobs - 1, len(cnames) - 1))
# num_train_nonpeaks = int(num_nonpeaks - num_test_nonpeaks)
num_train_nonpeaks = 10 * num_train_peaks
num_test_nonpeaks = num_nonpeaks - num_train_nonpeaks

for fi in xrange(0, fold):

    train_peak_idx = sorted(random.sample(xrange(num_peaks), num_train_peaks))
    train_nonpeak_idx = sorted(
        random.sample(xrange(num_nonpeaks), num_train_nonpeaks))

    test_peak_idx = list(set(xrange(num_peaks)) - set(train_peak_idx))
    test_nonpeak_idx = list(set(xrange(num_nonpeaks)) - set(train_nonpeak_idx))

    train_peak_list = [peak_list[i] for i in train_peak_idx]
    train_nonpeak_list = [nonpeak_list[i] for i in train_nonpeak_idx]
    train_list = train_peak_list + train_nonpeak_list
    print "    fold%d: train len=%d" % (fi, len(train_list))

    test_peak_list = [peak_list[i] for i in test_peak_idx]
    test_nonpeak_list = [nonpeak_list[i] for i in test_nonpeak_idx]
    test_list = test_peak_list + test_nonpeak_list
    print "    fold%d: test len=%d" % (fi, len(test_list))

    list_data.store_data("%srx.3.all.test%d.txt" % (output_dir, fi), test_list)
    list_data.store_data("%srx.3.all.train%d.txt" % (output_dir, fi),
                         train_list)

    # os.system("python svm_easy.py %srx.3.all.train%d.txt %srx.3.all.test%d.txt" % (output_dir, fi, output_dir, fi))

#
def store_output_files():
  # ips = merge_ips(ips, data.load_data(ips_dir + ip_dict_filename))
  data.store_data(ips_dir + ip_dict_filename, ips)
  ip_list = dict_ips_to_list(ips)
  list_data.store_data(ips_dir + ip_filename, ip_list)
def store_output_files():
    # print "Store!"
    hostnames.update(set(list_data.load_data(hosts_dir + hostname_filename)))
    list_data.store_data(hosts_dir + hostname_filename, list(hostnames))
for cdn in ips:
  ips_cdn = set()
  for dns in ips[cdn]:
    ips_cdn.update(ips[cdn][dns])

  num_ips_cname.append(len(ips_cdn))

  for provider in providers:
    m = re.search(provider, cdn)
    if m is not None:
      ips_provider[provider].update(ips_cdn)
      num_ips_provider[provider].append(len(ips_cdn))
      num_cnames_provider[provider].add(cdn)

list_data.store_data(output_dir + num_ips_cname_filename, map(str, num_ips_cname))


## number of cnames per provider
fh = open(output_dir + "num_cnames_per_provider.txt", 'w')
for provider in providers:
  fh.write("%d\n" % (len(num_cnames_provider[provider])))
fh.close()


## number of IPs per provider
cnt = []
for provider in providers:
  print "  %s: #cnames=%d" % (provider, len(num_ips_provider[provider]))
  print "  %s: #unique ips=%d" % (provider, len(ips_provider[provider]))
  list_data.store_data(output_dir + num_ips_provider_filename + "." + provider + ".txt", map(str, num_ips_provider[provider]))
def store_output_files():
  data.store_data(ips_dir + ip_dict_filename, ips)
  ip_list = dict_ips_to_list(ips)
  list_data.store_data(ips_dir + ip_filename, ip_list)
Example #21
0
def store_output_files(websites):
  list_data.store_data(output_dir + filename, list(websites))
Example #22
0
for cdn in ips:
    ips_cdn = set()
    for dns in ips[cdn]:
        ips_cdn.update(ips[cdn][dns])

    num_ips_cname.append(len(ips_cdn))

    for provider in providers:
        m = re.search(provider, cdn)
        if m is not None:
            ips_provider[provider].update(ips_cdn)
            num_ips_provider[provider].append(len(ips_cdn))
            num_cnames_provider[provider].add(cdn)

list_data.store_data(output_dir + num_ips_cname_filename,
                     map(str, num_ips_cname))

## number of cnames per provider
fh = open(output_dir + "num_cnames_per_provider.txt", 'w')
for provider in providers:
    fh.write("%d\n" % (len(num_cnames_provider[provider])))
fh.close()

## number of IPs per provider
cnt = []
for provider in providers:
    print "  %s: #cnames=%d" % (provider, len(num_ips_provider[provider]))
    print "  %s: #unique ips=%d" % (provider, len(ips_provider[provider]))
    list_data.store_data(
        output_dir + num_ips_provider_filename + "." + provider + ".txt",
        map(str, num_ips_provider[provider]))
    # print "remain: " + remains

    if label == 0:
      nonpeak_list.append(line)
    elif label == 1:
      peak_list.append(line)

    # m = re.search('(\d+):(-*\d+\.*\d*e*-*\d*)( *)(.*)', remains)
    # ret = m.groups()
    # while len(ret) > 0:
    #   remains = ret[3]
    #   if remains == "":
    #     break

    #   m = re.search('(\d+):(-*\d+\.*\d*e*\d*-*\d*)( *)(.*)', remains)
    #   ret = m.groups()
  f.close()


num_peaks    = len(peak_list)
num_nonpeaks = len(nonpeak_list)
print "  num peak samples: %d" % (num_peaks)
print "  num non-peak samples: %d" % (num_nonpeaks)


train_filename = "%srx.3.train_traces.%d.txt" % (output_dir, num_train_traces)
list_data.store_data(train_filename, peak_list+nonpeak_list)

os.system("python svm_easy.py " + train_filename)