def benchmark_similarity(alpha,k,options,args): fields = options.fields.split(" ") dim = options.dim.split(" ") types = options.types.split(" ") #test() #For csv #"(\d\d\d\d-\d\d-\d\d \d\d:\d\d:\d\d),[^,]*,[^,]*,([^,]*),([^,]*),[^,]*,[^,]*,[^,]*,[^,]*,[^,]*,[^,]*,[^,]*,([^,]*)" dict_dim = {} if len(dim) != len(types): raise Exception("The number of defined dimensions and types has to be equal") else: for i in range(len(dim)): dict_dim[dim[i]] = types[i] trees = build_aggregate_tree(options,args,fields,dim,types,dict_dim) options.input = '../test/app_ipv4/random.%s.reverse.txt'%k trees1 = build_aggregate_tree(options,args,fields,dim,types,dict_dim) print "Computing Similarity" s0 = maxSimilarity(trees[0][0],trees[-1][0]) print "still computing" s1 = maxSimilarity(trees[0][0],trees1[0][0]) print "done" return len(trees[0][0].preorder()),s0,s1
def test_edit_distance(): options,args = parse_options() fields = options.fields.split(" ") dim = options.dim.split(" ") types = options.types.split(" ") #test() #For csv #"(\d\d\d\d-\d\d-\d\d \d\d:\d\d:\d\d),[^,]*,[^,]*,([^,]*),([^,]*),[^,]*,[^,]*,[^,]*,[^,]*,[^,]*,[^,]*,[^,]*,([^,]*)" dict_dim = {} if len(dim) != len(types): raise Exception("The number of defined dimensions and types has to be equal") else: for i in range(len(dim)): dict_dim[dim[i]] = types[i] fout = open("similarity.edit.%s.dat"%options.offset,"w") upper_limit = options.batch + options.offset for i in xrange(0,options.batch): i = i + options.offset lrow = "%s\t"%i rrow = "" for alpha in [0.02, 0.5,1,2,4]: options.aggregate = alpha options.input = '../test/app_ipv4/random.%s.txt'%i trees = build_aggregate_tree(options,args,fields,dim,types,dict_dim) t = T(trees[0][0].get_root()) options.input = '../test/app_ipv4/random.%s.reverse.txt'%i trees1 = build_aggregate_tree(options,args,fields,dim,types,dict_dim) t1 = T(trees1[0][0].get_root()) options.input = '../test/app_ipv4/random.%s.txt'%(upper_limit-i) trees3 = build_aggregate_tree(options,args,fields,dim,types,dict_dim) t3 = T(trees3[0][0].get_root()) print "Edit Distance Based Similarity" s0 = compare.similarity(t,t1,g) s1 = compare.similarity(t,t3,g) lrow += "%s\t"%s0 rrow += "%s\t"%s1 fout.write(lrow+rrow+"\n") fout.close()
def main(): sys.setrecursionlimit(1000000) options,args = parse_options() fields = options.fields.split(" ") dim = options.dim.split(" ") types = options.types.split(" ") #test() #For csv #"(\d\d\d\d-\d\d-\d\d \d\d:\d\d:\d\d),[^,]*,[^,]*,([^,]*),([^,]*),[^,]*,[^,]*,[^,]*,[^,]*,[^,]*,[^,]*,[^,]*,([^,]*)" dict_dim = {} if len(dim) != len(types): raise Exception("The number of defined dimensions and types has to be equal") else: for i in range(len(dim)): dict_dim[dim[i]] = types[i] fout = open("similarity.2010224.test.dat","w") for alpa in [2]: options.aggregate = alpa options.input = '../test/app_ipv4/20100224/20100224.%s.txt'%i trees = build_aggregate_tree(options,args,fields,dim,types,dict_dim) t = T(trees[0][0].get_root()) options.input = '../test/app_ipv4/20100224/20100224.%s.reverse.txt'%i trees2 = build_aggregate_tree(options,args,fields,dim,types,dict_dim) t2 = T(trees2[0][0].get_root()) simil = compare.similarity(t,t2,g)