Esempio n. 1
0
def postOrder(maxprio,cwe,base,type, child,dir,filename):    
    ""   
    dirfname=filename.split('.')[0]
    dmaxi={0:dict(maxi0), 1:dict(maxi1), 2:dict(maxi2), 3:dict(maxi3), 4:dict(maxi4), 5:dict(maxi5), 6:dict(maxi6), 7:dict(maxi7), 8:dict(maxi8), 9:dict(maxi9), 10:dict(maxi10), 11:dict(maxi)}
    dmedg={0:dict(medg0), 1:dict(medg1), 2:dict(medg2), 3:dict(medg3), 4:dict(medg4), 5:dict(medg5), 6:dict(medg6), 7:dict(medg7), 8:dict(medg8), 9:dict(medg9), 10:dict(medg10), 11:dict(medg)}

    #Tercer Cuartil
    Q3=int(round((len(maxprio)+1)/4.0))    
             
    if base==11:
        fprior=open("workspace/"+dir+"/"+dir+"finalCWElist_"+type+".txt","w")            

    for i in range(12):        
        if (i==base) and (i in names):            
            #utils.makepath("workspace/"+dir)
            #utils.makepath("workspace/"+dir+"/"+dirfname)
            utils.makepath("workspace/"+dir+"/"+dirfname+"/post")
            utils.makepath("workspace/"+dir+"/"+dirfname+"/post/"+names[i])

            fprior=open("workspace/"+dir+"/"+dirfname+"/post/"+names[i]+"/"+"finalCWE-"+names[i]+"_"+type+".txt","w")
            
    cont=0
    #Buscamos el valor del 3er Quartil
    for key,value in sorted(maxprio.iteritems(), key=lambda (k,v):(v,k), reverse=True):                      
        cont+=1        
        if cont==Q3:
            Q3val=value
    
    #Reiniciamos el contador del Quartil y Ordenamos por prioridad las weaknesses
    if type=="all":
        cont=0
        for key,value in sorted(maxprio.iteritems(), key=lambda (k,v):(v,k), reverse=True):                        
            x=cwe[key]                         
            x=x.rstrip('\n')                            
            cont+=1                    
            fprior.write(str(cont)+"., "+x+": "+",Sp=, "+str(value)+", Max=, "+str(dmaxi[base][key])+"\n")            
            #fprior.write(str(cont)+". "+x+": "+",Sp=, "+str(value)+", Max=, "+str(dmaxi[base][key])+", Meg= "+str(dmedg[base][key])+"; Q3= "+str(Q3val)+")\n")            
            #Miro si id es weakness padre es decir pertenece a l[1] 
            id=key.split()[0].split("-")[1]            
            for l in child.iteritems():
                if id in l[1].split():
                    lid=l[0].split()[0].split("-")[1]
                    y=cwe[l[0]]                                        
                    y=y.rstrip('\n')
                    fprior.write("  |\n")                    
                    if (id in cwebase[base]) and (lid in cwebase[base]):                        
                        fprior.write("  -->"+y+": "+"(Sp= "+str(maxprio[l[0]])+"; Max= "+str(dmaxi[base][l[0]])+"; Meg= "+str(dmedg[base][l[0]])+")\n")                    
                        fprior.write("\n")                        
        fprior.close()
    else:
        cont=0
        for key,value in sorted(maxprio.iteritems(), key=lambda (k,v):(v,k), reverse=True):
            x=cwe[key]             
            x=x.rstrip('\n')                            
            cont+=1                    
            fprior.write(str(cont)+"., "+x+":,"+str(value)+"\n")            
        fprior.close()

    return 0
Esempio n. 2
0
def gnuplotpost(names,name,cweid,dir,filename):
    ""
    dirfname=filename.split('.')[0]
    utils.makepath("workspace/"+dir+"/"+dirfname+"/post")
    utils.makepath("workspace/"+dir+"/"+dirfname+"/post/"+names)
    post=open("workspace/"+dir+"/"+dirfname+"/post/"+names+"/"+cweid+".post","w")                
    #Here we create the headers for gnuplot file to see graphical interpretation        
    post.write("#"+name+"\n")
    post.write("Attributes"+" "+"Value\n")                                      
    return post      
Esempio n. 3
0
def av2data(dir,filename):
    ## Cargamos el vector de ataque XML 
    ## Aqui debemos pasar un listado completo de los AV (iterar por cada AV.xml)
        
    print "Attack vector: ", filename, "is being loaded!!!"
        
    avxml='workspace/'+dir+'/xml/'+filename
    dirfname=filename.split('.')[0]
    xmlDoc = minidom.parse(avxml)
    rootNode = xmlDoc.firstChild
    avcomp=rootNode.getElementsByTagName('Component')
    ""
    match={}
    matchaux={}
    ""
    #childof={}
    ""
    gnuplot="Attributes"
    
    cont=0      
    for i in avcomp:                
        #Recorro (los datos XML) para cada componente (i) del vector de ataque (avcomp)
        gnuplot+=" "+i.attributes.get('name').value.capitalize()       
        cwe=i.getElementsByTagName('CWE')                                            
        cont+=1        
        for j in cwe:                                                             
            #Recorro cada Weakness (j) del componente (cwe)
            cweid=j.attributes.get('id').value 
            id=cweid.split("-")[1]            
            cwename=j.attributes.get('name').value            
            cwename=cwename.lstrip()            
            cweatt=j.getElementsByTagName('Attribute')
            cwe=cweid+" "+cwename            
            child=j.attributes.get('childof').value                                               
            ""            
            utils.makepath("workspace/"+dir)
            utils.makepath("workspace/"+dir+"/"+dirfname)
            utils.makepath("workspace/"+dir+"/"+dirfname+"/data")
         
            try:
                data=open("workspace/"+dir+"/"+dirfname+"/data/"+cweid+".data","wb")                           
                data.write("#"+cwe+"\n")                           
                data.write(gnuplot+"\n")
                data.close()               
            except IOError as e:
                print "I/O error({0}): {1}".format(e.errno, e.strerror)                
                
            ""
            att=[]
            #Creo un diccionaro de weakness con sus atributos y sus valores
            if cwe in match:                
                for k in cweatt[0].attributes.keys():
                    att.append(k)                                    
                    try:
                        aux=cweatt[0].attributes.get(k).value                        
                    except:
                        aux=0
                    match[cwe][k]+=[aux]                                                            
            else:                                
                match[cwe]={}          
                matchaux[cwe]={}                                                                               
                for k in cweatt[0].attributes.keys():                    
                    att.append(k)                    
                    try:
                        aux=cweatt[0].attributes.get(k).value                        
                    except:
                        aux=0                 
                    match[cwe][k]=[aux]                                
                #Creo un match auxiliar con los hijos,
                matchaux[cwe]=child                                
                    
    return match, matchaux    
Esempio n. 4
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("--raw_data_dir",
                        default='../data',
                        type=str,
                        required=False)
    parser.add_argument("--output_dir",
                        default='../data/processed',
                        type=str,
                        required=False)
    parser.add_argument("-makeplot",
                        type=bool,
                        default=True,
                        help="Plot graph")

    args = parser.parse_args()
    input_dir = args.raw_data_dir
    comtradeurl = os.path.join(input_dir, "comtrade_data")
    makepath(args.output_dir)
    print("Processing data...")
    replace_dict = np.load(
        input_dir + '/countries_rename.npy',
        allow_pickle=True).item()  # Get dict items from npy file

    frames = []
    for name in os.listdir(comtradeurl):
        a = pd.read_csv(os.path.join(comtradeurl, name))
        a = a[['Trade Flow', 'Reporter', 'Partner', 'Trade Value (US$)']]
        frames.append(a)

    trade = pd.concat(frames, ignore_index=True)
    trade = trade.dropna()

    HCI_data = pd.read_csv(os.path.join(input_dir, 'HCIcountry.csv'))
    c_income_group = HCI_data[['Short Name', 'Income Group']]
    c_income_group = c_income_group.rename(columns={'Short Name': 'country'})
    inc_levels = set(c_income_group['Income Group'])

    inc_levels_dict = {i: j for j, i in enumerate(inc_levels)}

    countries_attributes = pd.read_csv(
        os.path.join(input_dir, "country_profile_variables2017.csv"))
    countries_attributes = countries_attributes.replace(
        ['~0', '~0.0', '-~0.0', '...'], 0)
    countries_attributes = countries_attributes.apply(
        lambda x: pd.to_numeric(x, errors='ignore'))

    # Create feature dictionary for easy selection
    feature_indices_dict = {
        i: j
        for i, j in enumerate(list(countries_attributes.columns))
    }

    countries_attributes.iloc[:,
                              2:] = countries_attributes.iloc[:,
                                                              2:].select_dtypes(
                                                                  exclude=
                                                                  'object')
    countries_attributes = countries_attributes.dropna(axis='columns')
    countries_attributes = countries_attributes.drop(['Region'], axis=1)
    countries_attributes.head()

    cols = countries_attributes.columns[1:]
    scaler = StandardScaler()
    scaled_data = scaler.fit_transform(countries_attributes.iloc[:, 1:])
    scaled_data = pd.DataFrame(scaled_data, columns=cols)
    countries_attributes.iloc[:, 1:] = scaled_data
    countries_attributes.head()

    #----------------------------------------------------------------------------------
    countries_distances = pd.read_csv(
        os.path.join(input_dir, "countries_distances.csv"))
    countries_distances = countries_distances.rename(columns={
        'pays1': 'country1',
        'pays2': 'country2'
    })
    countries_distances = countries_distances[['country1', 'country2', 'dist']]
    countries_names = list(countries_distances['country1'])
    countries_distances.head()

    #-----------------------------------------------------------------------------------
    dat1 = list(countries_attributes['country'])
    dat2 = list(c_income_group['country'])
    dat3 = list(set(countries_distances['country1']))
    dat3_1 = list(countries_distances['country1'])
    dat3_2 = list(countries_distances['country2'])

    dat1 = replace_(dat1, replace_dict)
    dat2 = replace_(dat2, replace_dict)
    dat3 = replace_(dat3, replace_dict)
    dat3_1 = replace_(dat3_1, replace_dict)

    countries_attributes['country'] = dat1
    c_income_group['country'] = dat2
    countries_distances['country1'] = dat3_1
    countries_distances['country2'] = dat3_2
    countries_attributes = countries_attributes.drop_duplicates(
        subset='country', inplace=False)

    #----------------------------------------------------------------------------------------
    # [print(i) for i in c_income_group['country']]
    common_countries = []  # Countries found in all three lists of countries
    c1_nc23 = []  # countries found in c1 but not in c2 and c3
    ncm123 = []
    c2_nc13 = []  # countries found in c2 but not in c1 and c3
    c3_nc12 = []  # countries found in c3 but not in c1 and c2
    for c in dat1:
        if c in dat2 and c in dat3:
            common_countries.append(c)
        else:
            ncm123.append(c)

    for c in dat2:
        if c in dat1 and c in dat3:
            pass
        else:
            c2_nc13.append(c)

    for c in dat3:
        if c in dat1 and c in dat2:
            pass
        else:
            c3_nc12.append(c)

    print(len(common_countries))

    #-----------------------------------------------------------------------------------------

    ## Make a dictionary of countries and their given codes as keys for easy reference

    country_dict = {j: i for i, j in enumerate(sorted(set(common_countries)))}
    #country_dict

    #----------------------------------------------------------------------------------------
    # Select countries with names or data appearing in each of the datasets

    countries_attributes = countries_attributes[
        countries_attributes['country'].isin(common_countries)].reset_index(
            drop=True)
    c_income_group = c_income_group[c_income_group['country'].isin(
        common_countries)]
    countries_dists = countries_distances[countries_distances['country1'].isin(
        common_countries)]
    countries_dists = countries_dists[countries_dists['country2'].isin(
        common_countries)]
    #--------------------------------------------------------------------------

    cdist = countries_dists.copy()
    edge_list = []
    for i in range(len(cdist)):
        c = (country_dict[str(cdist.iloc[i, 0])],
             country_dict[str(cdist.iloc[i, 1])], round(cdist.iloc[i, 2], 2))

        edge_list.append(c)
    edge_list = sorted(edge_list)
    # edge_list
    #------------------------------------------------------------------------------

    edges_dists = pd.DataFrame(edge_list)
    #-----------------------------------------------------------------------------------------------
    trade_reporters = list(set(trade['Reporter']))
    trade_partners = list(set(trade['Partner']))
    flow = list(set(trade['Trade Flow']))

    imports_data = trade[trade['Trade Flow'] == 'Import'].reset_index(
        drop=True)
    reimports_data = trade[trade['Trade Flow'] == 'Re-Import'].reset_index(
        drop=True)
    exports_data = trade[trade['Trade Flow'] == 'Export'].reset_index(
        drop=True)
    reexports_data = trade[trade['Trade Flow'] == 'Re-Export'].reset_index(
        drop=True)

    imp_partners = imports_data['Partner']
    imp_reporters = imports_data['Reporter']
    imports_data['Partner'] = replace_(imp_partners, replace_dict)
    imports_data['Reporter'] = replace_(imp_reporters, replace_dict)

    exp_partners = exports_data['Partner']
    exp_reporters = exports_data['Reporter']
    exports_data['Partner'] = replace_(exp_partners, replace_dict)
    exports_data['Reporter'] = replace_(exp_reporters, replace_dict)

    #-----------------------------------------------------------------------------------------------

    i = 0
    reps = replace_(trade['Reporter'], replace_dict)
    pars = replace_(trade['Partner'], replace_dict)
    als = list(reps) + list(pars)
    cin = []
    cnot = []
    for c in als:
        if c in list(common_countries):
            cin.append(c)
            i += 1
        else:
            cnot.append(c)

    cin1 = []
    cnot1 = []
    for c in list(common_countries):
        if c in als:
            cin1.append(c)
        else:
            cnot1.append(c)

    i = 0
    cin = []
    cnot = []
    partns = replace_(trade['Partner'], replace_dict)
    for c in list(common_countries):
        if c in list(partns):
            cin.append(c)
            i += 1
        else:
            cnot.append(c)

    imports_data = imports_data[imports_data['Reporter'].isin(
        common_countries)].reset_index(drop=True)
    imports_data = imports_data[imports_data['Partner'].isin(
        common_countries)].reset_index(drop=True)

    exports_data = exports_data[exports_data['Reporter'].isin(
        common_countries)].reset_index(drop=True)
    exports_data = exports_data[exports_data['Partner'].isin(
        common_countries)].reset_index(drop=True)

    reimports_data = reimports_data[reimports_data['Reporter'].isin(
        common_countries)].reset_index(drop=True)
    reimports_data = reimports_data[reimports_data['Partner'].isin(
        common_countries)].reset_index(drop=True)

    reexports_data = reexports_data[reexports_data['Reporter'].isin(
        common_countries)].reset_index(drop=True)
    reexports_data = reexports_data[reexports_data['Partner'].isin(
        common_countries)].reset_index(drop=True)

    # Make edges and remove recurring ones
    iedges = make_directed_edges(imports_data, country_dict)
    iedges, iself_edges = check_cyclic_edges(iedges, remove_edges=True)

    eedges = make_directed_edges(exports_data, country_dict)
    eedges, eself_edges = check_cyclic_edges(eedges, remove_edges=True)

    # cdict = income_level_dict(c_income_group, country_dict)
    il_dict = income_level_dict(c_income_group, country_dict)

    inc = c_income_group.sort_values(by=['country'])
    labels = list(map(inc_levels_dict.get, inc['Income Group']))
    data = countries_attributes.sort_values(by=['country'])

    attr_names = data.iloc[:, 1:].columns
    attr_data = data.iloc[:, 1:].values
    attr_shape = attr_data.shape
    class_names = list(inc_levels)

    isrc, itar, iwei = zip(*iedges)  # Unzip import edges
    esrc, etar, ewei = zip(*eedges)  # Unzip export edges
    dsrc, dtar, dwei = zip(*edge_list)  #Unzip distance edges

    imat = csr_matrix((iwei, (isrc, itar))).todense()
    emat = csr_matrix((ewei, (esrc, etar))).todense()
    sparse_adj_dists = csr_matrix(
        (dwei, (dsrc, dtar)))  # Make sparse adjacency matrix for distances

    tmat = imat - emat  # Trade balance incidence matrix
    sparse_adj_trade = csr_matrix(
        tmat)  # Make sparse adjacency matrix for trade balance

    trade_savez_files = TemporaryFile()
    output_file = os.path.join(args.output_dir, "trade_savez_files")
    saver = np.savez(output_file,
                     attr_data=attr_data,
                     attr_shape=attr_shape,
                     sparse_adj_trade=sparse_adj_trade,
                     sparse_adj_dists=sparse_adj_dists,
                     labels=labels,
                     class_names=class_names)

    print("Done!... Preprocessed data saved in ", args.output_dir)
    if args.makeplot:
        graph_outpath = "../images"
        makepath(graph_outpath)
        makeplot(edge_list, countries_attributes, country_dict, graph_outpath)