def test_edit_distance(): options,args = parse_options() fields = options.fields.split(" ") dim = options.dim.split(" ") types = options.types.split(" ") #test() #For csv #"(\d\d\d\d-\d\d-\d\d \d\d:\d\d:\d\d),[^,]*,[^,]*,([^,]*),([^,]*),[^,]*,[^,]*,[^,]*,[^,]*,[^,]*,[^,]*,[^,]*,([^,]*)" dict_dim = {} if len(dim) != len(types): raise Exception("The number of defined dimensions and types has to be equal") else: for i in range(len(dim)): dict_dim[dim[i]] = types[i] fout = open("similarity.edit.%s.dat"%options.offset,"w") upper_limit = options.batch + options.offset for i in xrange(0,options.batch): i = i + options.offset lrow = "%s\t"%i rrow = "" for alpha in [0.02, 0.5,1,2,4]: options.aggregate = alpha options.input = '../test/app_ipv4/random.%s.txt'%i trees = build_aggregate_tree(options,args,fields,dim,types,dict_dim) t = T(trees[0][0].get_root()) options.input = '../test/app_ipv4/random.%s.reverse.txt'%i trees1 = build_aggregate_tree(options,args,fields,dim,types,dict_dim) t1 = T(trees1[0][0].get_root()) options.input = '../test/app_ipv4/random.%s.txt'%(upper_limit-i) trees3 = build_aggregate_tree(options,args,fields,dim,types,dict_dim) t3 = T(trees3[0][0].get_root()) print "Edit Distance Based Similarity" s0 = compare.similarity(t,t1,g) s1 = compare.similarity(t,t3,g) lrow += "%s\t"%s0 rrow += "%s\t"%s1 fout.write(lrow+rrow+"\n") fout.close()
def main(): sys.setrecursionlimit(1000000) options,args = parse_options() fields = options.fields.split(" ") dim = options.dim.split(" ") types = options.types.split(" ") #test() #For csv #"(\d\d\d\d-\d\d-\d\d \d\d:\d\d:\d\d),[^,]*,[^,]*,([^,]*),([^,]*),[^,]*,[^,]*,[^,]*,[^,]*,[^,]*,[^,]*,[^,]*,([^,]*)" dict_dim = {} if len(dim) != len(types): raise Exception("The number of defined dimensions and types has to be equal") else: for i in range(len(dim)): dict_dim[dim[i]] = types[i] fout = open("similarity.LRU_sim.dat","w") max_nodes = [2**i for i in xrange(4,10)] options.aggregate = 0.7 for i in xrange(0,20): row = "%s\t"%i for m in max_nodes: #options.aggregate = alpa options.max_nodes = m options.input = '../test/app_ipv4/20100224/nfcapd.201002240220.chunk.%s.txt'%i trees = build_LRU_aggregate_tree(options,args,fields,dim,types,dict_dim) t = T(trees[0][0].get_root()) options.input = '../test/app_ipv4/20100130/nfcapd.201001301458.chunk.%s.txt'%i trees2 = build_LRU_aggregate_tree(options,args,fields,dim,types,dict_dim) t2 = T(trees2[0][0].get_root()) simil = compare.similarity(t,t2,g) row+="%s\t"%simil print i,alpa,simil row +="\n" fout.write(row) fout.close()
def main(): sys.setrecursionlimit(1000000) options,args = parse_options() fields = options.fields.split(" ") dim = options.dim.split(" ") types = options.types.split(" ") #test() #For csv #"(\d\d\d\d-\d\d-\d\d \d\d:\d\d:\d\d),[^,]*,[^,]*,([^,]*),([^,]*),[^,]*,[^,]*,[^,]*,[^,]*,[^,]*,[^,]*,[^,]*,([^,]*)" dict_dim = {} if len(dim) != len(types): raise Exception("The number of defined dimensions and types has to be equal") else: for i in range(len(dim)): dict_dim[dim[i]] = types[i] fout = open("similarity.2010224.test.dat","w") for alpa in [2]: options.aggregate = alpa options.input = '../test/app_ipv4/20100224/20100224.%s.txt'%i trees = build_aggregate_tree(options,args,fields,dim,types,dict_dim) t = T(trees[0][0].get_root()) options.input = '../test/app_ipv4/20100224/20100224.%s.reverse.txt'%i trees2 = build_aggregate_tree(options,args,fields,dim,types,dict_dim) t2 = T(trees2[0][0].get_root()) simil = compare.similarity(t,t2,g)