#gmetrics_lib.get_graph_size(test_graph) #data_src = 'evt_logs' #test_graph = '/pic/projects/mnms4graphs/eventlogs/tsv/plogon20120429.dmp.gr.tsv' #gmetrics_lib.get_graph_size(test_graph) #data_src = 'nccdc' #test_graph = '/pic/projects/mnms4graphs/nccdc/2013/tsv/win3600/nccdc2013_win3600_12.tsv' #gmetrics_lib.get_graph_size(test_graph) #sys.exit(1) test_graph = sys.argv[1] data_src = os.path.basename(test_graph) data_src = data_src.replace('.tsv', '') test_graph_dir = os.path.dirname(test_graph) tmp_graphs_dir = test_graph_dir + '/tmp-graphs/' gmetrics_lib.mkdir_p(tmp_graphs_dir) tmp_graph_prefix = tmp_graphs_dir + 'tmp_graph' k = get_k(test_graph) print('Computing top-' + str(k) + ' central nodes') top_central_nodes = gmetrics_lib.get_top_k_by_centrality(test_graph, k) op_score = [] [init_nodes, init_edges, init_max_comp_sz] = \ gmetrics_lib.get_largest_component_size(test_graph) init_ratio = init_max_comp_sz/init_nodes op_score.append([0, init_ratio, 1]) curr_graph = test_graph for i in range(len(top_central_nodes)): print('Computing op-ratio for graph ' + str(i+1) + ' of ' + str(k))
#path = '/pic/projects/mnms4graphs/iscx/netflow/testbed-11jun-aggr.tsv' #flow_path = 'graphs/plogon20120430.dmp.gr' #transform_eventlog(flow_path) #flow_path = 'graphs/testbed-13jun-aggr.txt' # For processing UNB data #flow_path = sys.argv[1] #ip_mapper = transform_unb(flow_path) #prefix = os.path.basename(flow_path) #prefix = prefix.split('.')[0] #outdir = '/pic/projects/mnms4graphs/iscx/flo_features' # For processing simulated TSV files path_without_tsv_ext = sys.argv[1] flow_path = path_without_tsv_ext ip_mapper = process_tsv(flow_path) prefix = os.path.basename(flow_path) outdir = '/pic/projects/mnms4graphs/visr/tsv_features' gmetrics_lib.mkdir_p(outdir) feature_data_prefix = outdir + '/' + prefix print('Computing features from : ' + flow_path) print('Output prefix : ' + feature_data_prefix) t1 = time.time() feature_table = dict() build_feature_table(flow_path, ip_mapper, feature_table, False) write_feature_matrix(feature_table, feature_data_prefix) t2 = time.time() print('Total time taken: ' + str(t2-t1))
#gmetrics_lib.get_graph_size(test_graph) #data_src = 'evt_logs' #test_graph = '/pic/projects/mnms4graphs/eventlogs/tsv/plogon20120429.dmp.gr.tsv' #gmetrics_lib.get_graph_size(test_graph) #data_src = 'nccdc' #test_graph = '/pic/projects/mnms4graphs/nccdc/2013/tsv/win3600/nccdc2013_win3600_12.tsv' #gmetrics_lib.get_graph_size(test_graph) #sys.exit(1) test_graph = sys.argv[1] data_src = os.path.basename(test_graph) data_src = data_src.replace('.tsv', '') test_graph_dir = os.path.dirname(test_graph) tmp_graphs_dir = test_graph_dir + '/tmp-graphs/' gmetrics_lib.mkdir_p(tmp_graphs_dir) tmp_graph_prefix = tmp_graphs_dir + 'tmp_graph' k = get_k(test_graph) print('Computing top-' + str(k) + ' central nodes') top_central_nodes = gmetrics_lib.get_top_k_by_centrality(test_graph, k) op_score = [] [init_nodes, init_edges, init_max_comp_sz] = \ gmetrics_lib.get_largest_component_size(test_graph) init_ratio = init_max_comp_sz / init_nodes op_score.append([0, init_ratio, 1]) curr_graph = test_graph for i in range(len(top_central_nodes)): print('Computing op-ratio for graph ' + str(i + 1) + ' of ' + str(k))
#path = '/pic/projects/mnms4graphs/iscx/netflow/testbed-11jun-aggr.tsv' #flow_path = 'graphs/plogon20120430.dmp.gr' #transform_eventlog(flow_path) #flow_path = 'graphs/testbed-13jun-aggr.txt' # For processing UNB data #flow_path = sys.argv[1] #ip_mapper = transform_unb(flow_path) #prefix = os.path.basename(flow_path) #prefix = prefix.split('.')[0] #outdir = '/pic/projects/mnms4graphs/iscx/flo_features' # For processing simulated TSV files path_without_tsv_ext = sys.argv[1] flow_path = path_without_tsv_ext ip_mapper = process_tsv(flow_path) prefix = os.path.basename(flow_path) outdir = '/pic/projects/mnms4graphs/visr/tsv_features' gmetrics_lib.mkdir_p(outdir) feature_data_prefix = outdir + '/' + prefix print('Computing features from : ' + flow_path) print('Output prefix : ' + feature_data_prefix) t1 = time.time() feature_table = dict() build_feature_table(flow_path, ip_mapper, feature_table, False) write_feature_matrix(feature_table, feature_data_prefix) t2 = time.time() print('Total time taken: ' + str(t2 - t1))
f_out.write(u + str(' ') + v + '\n') f.close() f_out.close() if len(sys.argv) < 3: print('USAGE:') print(' ' + sys.argv[0] + ' <directory with tsv files (FORMAT: "u v")> <k-core count, e.g. 500>') print(' A subdirectory named kcore_graphs will be created in the directory with tsv files') sys.exit(1) tsv_dir = sys.argv[1] files = os.listdir(tsv_dir) #kcores = ['10', '50', '100', '500'] kcores = [sys.argv[2]] kcore_graph_dir = tsv_dir + '/kcore_graphs/' gmetrics_lib.mkdir_p(kcore_graph_dir) for f in files: if f.find('.tsv') != -1: #csv2tsv(f) #outpath = f.replace('csv', 'tsv') inpath = tsv_dir + '/' + f tokens = f.split('.') prefix = tokens[0] kcore_prefix = kcore_graph_dir + 'kcore_' + prefix for k in kcores: cmd = 'kcore --graph ' + inpath + \ ' --kmin ' + k + ' --kmax ' + k + \ ' --savecores ' + kcore_prefix + ' --format tsv' os.system(cmd) graph_prefix = kcore_prefix + '.' + k
if len(sys.argv) < 3: print('USAGE:') print( ' ' + sys.argv[0] + ' <directory with tsv files (FORMAT: "u v")> <k-core count, e.g. 500>') print( ' A subdirectory named kcore_graphs will be created in the directory with tsv files' ) sys.exit(1) tsv_dir = sys.argv[1] files = os.listdir(tsv_dir) #kcores = ['10', '50', '100', '500'] kcores = [sys.argv[2]] kcore_graph_dir = tsv_dir + '/kcore_graphs/' gmetrics_lib.mkdir_p(kcore_graph_dir) for f in files: if f.find('.tsv') != -1: #csv2tsv(f) #outpath = f.replace('csv', 'tsv') inpath = tsv_dir + '/' + f tokens = f.split('.') prefix = tokens[0] kcore_prefix = kcore_graph_dir + 'kcore_' + prefix for k in kcores: cmd = 'kcore --graph ' + inpath + \ ' --kmin ' + k + ' --kmax ' + k + \ ' --savecores ' + kcore_prefix + ' --format tsv' os.system(cmd) graph_prefix = kcore_prefix + '.' + k