Esempio n. 1
0
def benchmark_similarity(alpha,k,options,args):
    
    
    fields = options.fields.split(" ")
    dim = options.dim.split(" ")
    types = options.types.split(" ")
    
    
    #test()
    
    #For csv
    #"(\d\d\d\d-\d\d-\d\d \d\d:\d\d:\d\d),[^,]*,[^,]*,([^,]*),([^,]*),[^,]*,[^,]*,[^,]*,[^,]*,[^,]*,[^,]*,[^,]*,([^,]*)"
 
    dict_dim = {}
    
    if len(dim) != len(types):
        raise Exception("The number of defined dimensions and types has to be equal")
    else:
        for i in range(len(dim)):
            dict_dim[dim[i]] = types[i]
    
    trees = build_aggregate_tree(options,args,fields,dim,types,dict_dim)
    options.input = '../test/app_ipv4/random.%s.reverse.txt'%k
    trees1 = build_aggregate_tree(options,args,fields,dim,types,dict_dim)
    print "Computing Similarity"
    s0 = maxSimilarity(trees[0][0],trees[-1][0])
    print "still computing"
    s1 = maxSimilarity(trees[0][0],trees1[0][0])
    print "done"
    return len(trees[0][0].preorder()),s0,s1
Esempio n. 2
0
def test_edit_distance():    
    
    options,args = parse_options()
                
    fields = options.fields.split(" ")
    dim = options.dim.split(" ")
    types = options.types.split(" ")
    
    
    #test()
    
    #For csv
    #"(\d\d\d\d-\d\d-\d\d \d\d:\d\d:\d\d),[^,]*,[^,]*,([^,]*),([^,]*),[^,]*,[^,]*,[^,]*,[^,]*,[^,]*,[^,]*,[^,]*,([^,]*)"
 
    dict_dim = {}
    
    if len(dim) != len(types):
        raise Exception("The number of defined dimensions and types has to be equal")
    else:
        for i in range(len(dim)):
            dict_dim[dim[i]] = types[i]
    fout = open("similarity.edit.%s.dat"%options.offset,"w")
    upper_limit = options.batch + options.offset
    for i in xrange(0,options.batch): 
        i = i + options.offset
        lrow = "%s\t"%i
        rrow = ""   
        for alpha in [0.02, 0.5,1,2,4]:    
            options.aggregate = alpha
            options.input = '../test/app_ipv4/random.%s.txt'%i
            trees = build_aggregate_tree(options,args,fields,dim,types,dict_dim)
            t = T(trees[0][0].get_root())
            
            options.input = '../test/app_ipv4/random.%s.reverse.txt'%i
            trees1 = build_aggregate_tree(options,args,fields,dim,types,dict_dim)
            t1 = T(trees1[0][0].get_root())
                        
            options.input = '../test/app_ipv4/random.%s.txt'%(upper_limit-i)
            trees3 = build_aggregate_tree(options,args,fields,dim,types,dict_dim)
            t3 = T(trees3[0][0].get_root())
            
            print "Edit Distance Based Similarity"
            s0 = compare.similarity(t,t1,g)
            s1 = compare.similarity(t,t3,g)
            lrow += "%s\t"%s0
            rrow += "%s\t"%s1
        fout.write(lrow+rrow+"\n")
    fout.close()
Esempio n. 3
0
 def main():
     sys.setrecursionlimit(1000000)
     
     options,args = parse_options()
                 
     fields = options.fields.split(" ")
     dim = options.dim.split(" ")
     types = options.types.split(" ")
     
     
     #test()
     
     #For csv
     #"(\d\d\d\d-\d\d-\d\d \d\d:\d\d:\d\d),[^,]*,[^,]*,([^,]*),([^,]*),[^,]*,[^,]*,[^,]*,[^,]*,[^,]*,[^,]*,[^,]*,([^,]*)"
  
     dict_dim = {}
     
     if len(dim) != len(types):
         raise Exception("The number of defined dimensions and types has to be equal")
     else:
         for i in range(len(dim)):
             dict_dim[dim[i]] = types[i]
     fout = open("similarity.2010224.test.dat","w")
     for alpa in [2]:
         options.aggregate = alpa
     
         options.input = '../test/app_ipv4/20100224/20100224.%s.txt'%i
         trees = build_aggregate_tree(options,args,fields,dim,types,dict_dim)
         t = T(trees[0][0].get_root()) 
         
         
         options.input = '../test/app_ipv4/20100224/20100224.%s.reverse.txt'%i
         trees2 = build_aggregate_tree(options,args,fields,dim,types,dict_dim)
         t2 = T(trees2[0][0].get_root())
         
         simil = compare.similarity(t,t2,g)