Esempio n. 1
0
def test_edit_distance():    
    
    options,args = parse_options()
                
    fields = options.fields.split(" ")
    dim = options.dim.split(" ")
    types = options.types.split(" ")
    
    
    #test()
    
    #For csv
    #"(\d\d\d\d-\d\d-\d\d \d\d:\d\d:\d\d),[^,]*,[^,]*,([^,]*),([^,]*),[^,]*,[^,]*,[^,]*,[^,]*,[^,]*,[^,]*,[^,]*,([^,]*)"
 
    dict_dim = {}
    
    if len(dim) != len(types):
        raise Exception("The number of defined dimensions and types has to be equal")
    else:
        for i in range(len(dim)):
            dict_dim[dim[i]] = types[i]
    fout = open("similarity.edit.%s.dat"%options.offset,"w")
    upper_limit = options.batch + options.offset
    for i in xrange(0,options.batch): 
        i = i + options.offset
        lrow = "%s\t"%i
        rrow = ""   
        for alpha in [0.02, 0.5,1,2,4]:    
            options.aggregate = alpha
            options.input = '../test/app_ipv4/random.%s.txt'%i
            trees = build_aggregate_tree(options,args,fields,dim,types,dict_dim)
            t = T(trees[0][0].get_root())
            
            options.input = '../test/app_ipv4/random.%s.reverse.txt'%i
            trees1 = build_aggregate_tree(options,args,fields,dim,types,dict_dim)
            t1 = T(trees1[0][0].get_root())
                        
            options.input = '../test/app_ipv4/random.%s.txt'%(upper_limit-i)
            trees3 = build_aggregate_tree(options,args,fields,dim,types,dict_dim)
            t3 = T(trees3[0][0].get_root())
            
            print "Edit Distance Based Similarity"
            s0 = compare.similarity(t,t1,g)
            s1 = compare.similarity(t,t3,g)
            lrow += "%s\t"%s0
            rrow += "%s\t"%s1
        fout.write(lrow+rrow+"\n")
    fout.close()
Esempio n. 2
0
    def main():
        sys.setrecursionlimit(1000000)
        
        options,args = parse_options()
                    
        fields = options.fields.split(" ")
        dim = options.dim.split(" ")
        types = options.types.split(" ")
        
        
        #test()
        
        #For csv
        #"(\d\d\d\d-\d\d-\d\d \d\d:\d\d:\d\d),[^,]*,[^,]*,([^,]*),([^,]*),[^,]*,[^,]*,[^,]*,[^,]*,[^,]*,[^,]*,[^,]*,([^,]*)"
     
        dict_dim = {}
        
        if len(dim) != len(types):
            raise Exception("The number of defined dimensions and types has to be equal")
        else:
            for i in range(len(dim)):
                dict_dim[dim[i]] = types[i]
        fout = open("similarity.LRU_sim.dat","w")
        max_nodes = [2**i for i in xrange(4,10)]
        options.aggregate = 0.7
        for i in xrange(0,20):
            row = "%s\t"%i
            for m in max_nodes:
                #options.aggregate = alpa
                options.max_nodes = m
                options.input = '../test/app_ipv4/20100224/nfcapd.201002240220.chunk.%s.txt'%i
                trees = build_LRU_aggregate_tree(options,args,fields,dim,types,dict_dim)
                t = T(trees[0][0].get_root()) 
                
                
                options.input = '../test/app_ipv4/20100130/nfcapd.201001301458.chunk.%s.txt'%i
                trees2 = build_LRU_aggregate_tree(options,args,fields,dim,types,dict_dim)
                t2 = T(trees2[0][0].get_root())
                
                simil = compare.similarity(t,t2,g)
                
                row+="%s\t"%simil
                print i,alpa,simil
            row +="\n"
            fout.write(row)

        fout.close()
Esempio n. 3
0
 def main():
     sys.setrecursionlimit(1000000)
     
     options,args = parse_options()
                 
     fields = options.fields.split(" ")
     dim = options.dim.split(" ")
     types = options.types.split(" ")
     
     
     #test()
     
     #For csv
     #"(\d\d\d\d-\d\d-\d\d \d\d:\d\d:\d\d),[^,]*,[^,]*,([^,]*),([^,]*),[^,]*,[^,]*,[^,]*,[^,]*,[^,]*,[^,]*,[^,]*,([^,]*)"
  
     dict_dim = {}
     
     if len(dim) != len(types):
         raise Exception("The number of defined dimensions and types has to be equal")
     else:
         for i in range(len(dim)):
             dict_dim[dim[i]] = types[i]
     fout = open("similarity.2010224.test.dat","w")
     for alpa in [2]:
         options.aggregate = alpa
     
         options.input = '../test/app_ipv4/20100224/20100224.%s.txt'%i
         trees = build_aggregate_tree(options,args,fields,dim,types,dict_dim)
         t = T(trees[0][0].get_root()) 
         
         
         options.input = '../test/app_ipv4/20100224/20100224.%s.reverse.txt'%i
         trees2 = build_aggregate_tree(options,args,fields,dim,types,dict_dim)
         t2 = T(trees2[0][0].get_root())
         
         simil = compare.similarity(t,t2,g)