Ejemplo n.º 1
0
#gmetrics_lib.get_graph_size(test_graph)
#data_src = 'evt_logs'
#test_graph = '/pic/projects/mnms4graphs/eventlogs/tsv/plogon20120429.dmp.gr.tsv'
#gmetrics_lib.get_graph_size(test_graph)
#data_src = 'nccdc'
#test_graph = '/pic/projects/mnms4graphs/nccdc/2013/tsv/win3600/nccdc2013_win3600_12.tsv'
#gmetrics_lib.get_graph_size(test_graph)

#sys.exit(1)
test_graph = sys.argv[1]
data_src = os.path.basename(test_graph)
data_src = data_src.replace('.tsv', '')

test_graph_dir = os.path.dirname(test_graph)
tmp_graphs_dir = test_graph_dir + '/tmp-graphs/'
gmetrics_lib.mkdir_p(tmp_graphs_dir)
tmp_graph_prefix = tmp_graphs_dir + 'tmp_graph'

k = get_k(test_graph)
print('Computing top-' + str(k) + ' central nodes')
top_central_nodes = gmetrics_lib.get_top_k_by_centrality(test_graph, k)

op_score = []
[init_nodes, init_edges, init_max_comp_sz] = \
    gmetrics_lib.get_largest_component_size(test_graph)
init_ratio = init_max_comp_sz/init_nodes
op_score.append([0, init_ratio, 1])

curr_graph = test_graph
for i in range(len(top_central_nodes)):
    print('Computing op-ratio for graph ' + str(i+1) + ' of ' + str(k))
#path = '/pic/projects/mnms4graphs/iscx/netflow/testbed-11jun-aggr.tsv'
#flow_path = 'graphs/plogon20120430.dmp.gr'
#transform_eventlog(flow_path)
#flow_path = 'graphs/testbed-13jun-aggr.txt'

# For processing UNB data
#flow_path = sys.argv[1]
#ip_mapper = transform_unb(flow_path)
#prefix = os.path.basename(flow_path)
#prefix = prefix.split('.')[0]
#outdir = '/pic/projects/mnms4graphs/iscx/flo_features'

# For processing simulated TSV files
path_without_tsv_ext = sys.argv[1]
flow_path = path_without_tsv_ext
ip_mapper = process_tsv(flow_path)
prefix = os.path.basename(flow_path)
outdir = '/pic/projects/mnms4graphs/visr/tsv_features'

gmetrics_lib.mkdir_p(outdir)
feature_data_prefix = outdir + '/' + prefix
print('Computing features from : ' + flow_path)
print('Output prefix : ' + feature_data_prefix)

t1 = time.time()
feature_table = dict()
build_feature_table(flow_path, ip_mapper, feature_table, False)
write_feature_matrix(feature_table, feature_data_prefix)
t2 = time.time()
print('Total time taken: ' + str(t2-t1))
Ejemplo n.º 3
0
#gmetrics_lib.get_graph_size(test_graph)
#data_src = 'evt_logs'
#test_graph = '/pic/projects/mnms4graphs/eventlogs/tsv/plogon20120429.dmp.gr.tsv'
#gmetrics_lib.get_graph_size(test_graph)
#data_src = 'nccdc'
#test_graph = '/pic/projects/mnms4graphs/nccdc/2013/tsv/win3600/nccdc2013_win3600_12.tsv'
#gmetrics_lib.get_graph_size(test_graph)

#sys.exit(1)
test_graph = sys.argv[1]
data_src = os.path.basename(test_graph)
data_src = data_src.replace('.tsv', '')

test_graph_dir = os.path.dirname(test_graph)
tmp_graphs_dir = test_graph_dir + '/tmp-graphs/'
gmetrics_lib.mkdir_p(tmp_graphs_dir)
tmp_graph_prefix = tmp_graphs_dir + 'tmp_graph'

k = get_k(test_graph)
print('Computing top-' + str(k) + ' central nodes')
top_central_nodes = gmetrics_lib.get_top_k_by_centrality(test_graph, k)

op_score = []
[init_nodes, init_edges, init_max_comp_sz] = \
    gmetrics_lib.get_largest_component_size(test_graph)
init_ratio = init_max_comp_sz / init_nodes
op_score.append([0, init_ratio, 1])

curr_graph = test_graph
for i in range(len(top_central_nodes)):
    print('Computing op-ratio for graph ' + str(i + 1) + ' of ' + str(k))
#path = '/pic/projects/mnms4graphs/iscx/netflow/testbed-11jun-aggr.tsv'
#flow_path = 'graphs/plogon20120430.dmp.gr'
#transform_eventlog(flow_path)
#flow_path = 'graphs/testbed-13jun-aggr.txt'

# For processing UNB data
#flow_path = sys.argv[1]
#ip_mapper = transform_unb(flow_path)
#prefix = os.path.basename(flow_path)
#prefix = prefix.split('.')[0]
#outdir = '/pic/projects/mnms4graphs/iscx/flo_features'

# For processing simulated TSV files
path_without_tsv_ext = sys.argv[1]
flow_path = path_without_tsv_ext
ip_mapper = process_tsv(flow_path)
prefix = os.path.basename(flow_path)
outdir = '/pic/projects/mnms4graphs/visr/tsv_features'

gmetrics_lib.mkdir_p(outdir)
feature_data_prefix = outdir + '/' + prefix
print('Computing features from : ' + flow_path)
print('Output prefix : ' + feature_data_prefix)

t1 = time.time()
feature_table = dict()
build_feature_table(flow_path, ip_mapper, feature_table, False)
write_feature_matrix(feature_table, feature_data_prefix)
t2 = time.time()
print('Total time taken: ' + str(t2 - t1))
Ejemplo n.º 5
0
        f_out.write(u + str(' ') + v + '\n')
    f.close()
    f_out.close()

if len(sys.argv) < 3:
    print('USAGE:')
    print(' ' + sys.argv[0] + ' <directory with tsv files (FORMAT: "u v")> <k-core count, e.g. 500>')
    print(' A subdirectory named kcore_graphs will be created in the directory with tsv files')
    sys.exit(1)

tsv_dir = sys.argv[1]
files = os.listdir(tsv_dir)
#kcores = ['10', '50', '100', '500']
kcores = [sys.argv[2]]
kcore_graph_dir = tsv_dir + '/kcore_graphs/'
gmetrics_lib.mkdir_p(kcore_graph_dir)

for f in files:
    if f.find('.tsv') != -1:
        #csv2tsv(f)
        #outpath = f.replace('csv', 'tsv')
        inpath = tsv_dir + '/' + f
        tokens = f.split('.')
        prefix = tokens[0]
        kcore_prefix = kcore_graph_dir + 'kcore_' + prefix
        for k in kcores:
            cmd = 'kcore --graph ' + inpath + \
            ' --kmin ' + k + ' --kmax ' + k + \
            ' --savecores ' + kcore_prefix + ' --format tsv'
            os.system(cmd)
            graph_prefix = kcore_prefix + '.' + k
Ejemplo n.º 6
0
if len(sys.argv) < 3:
    print('USAGE:')
    print(
        ' ' + sys.argv[0] +
        ' <directory with tsv files (FORMAT: "u v")> <k-core count, e.g. 500>')
    print(
        ' A subdirectory named kcore_graphs will be created in the directory with tsv files'
    )
    sys.exit(1)

tsv_dir = sys.argv[1]
files = os.listdir(tsv_dir)
#kcores = ['10', '50', '100', '500']
kcores = [sys.argv[2]]
kcore_graph_dir = tsv_dir + '/kcore_graphs/'
gmetrics_lib.mkdir_p(kcore_graph_dir)

for f in files:
    if f.find('.tsv') != -1:
        #csv2tsv(f)
        #outpath = f.replace('csv', 'tsv')
        inpath = tsv_dir + '/' + f
        tokens = f.split('.')
        prefix = tokens[0]
        kcore_prefix = kcore_graph_dir + 'kcore_' + prefix
        for k in kcores:
            cmd = 'kcore --graph ' + inpath + \
            ' --kmin ' + k + ' --kmax ' + k + \
            ' --savecores ' + kcore_prefix + ' --format tsv'
            os.system(cmd)
            graph_prefix = kcore_prefix + '.' + k