def generate_plots_for_df(df, gmms, path=""):
    headers, _ = preprocessing.get_header_data()
    headers.remove('protocol_type')
    headers.remove('attack')
    headers.remove('difficulty')

    # plot for classes
    protocol_types = model.protocol_types  #["udp","tcp","icmp"]

    for protocol_index, protocol_type in enumerate(protocol_types):
        gmm_normals = gmms[0][protocol_index]
        gmm_abnormals = gmms[1][protocol_index]

        # normal data
        df_normal = copy.deepcopy(df)
        df_normal = df_normal[(
            df_normal["attack"] == 11)]  # only select for 1 class
        df_normal = df_normal[(df_normal["protocol_type"] == protocol_index)]
        df_normal.drop('attack', 1, inplace=True)  # remove useless
        df_normal.drop('difficulty', 1, inplace=True)  # remove useless
        df_normal.drop('protocol_type', 1, inplace=True)
        df_normal.reset_index(drop=True)
        df_normal = df_normal[0:10]

        # abnormal data
        for i, attack_type in enumerate(model.attack_types):
            if i == 11:
                continue
            df_abnormal = copy.deepcopy(df)
            df_abnormal = df_abnormal[(
                df_abnormal["attack"] == i)]  # only select for 1 class
            df_abnormal = df_abnormal[(
                df_abnormal["protocol_type"] == protocol_index)]

            if 1 > len(df_abnormal):
                continue

            df_abnormal.drop('attack', 1, inplace=True)  # remove useless
            df_abnormal.drop('difficulty', 1, inplace=True)  # remove useless
            df_abnormal.drop('protocol_type', 1, inplace=True)
            df_abnormal.reset_index(drop=True)
            df_abnormal = df_abnormal[0:10]

            gmm_normals_protcl = gmms[0][protocol_index]
            gmm_abnormals_protcl = gmms[1][protocol_index]
            gmms_protcl = [gmm_normals_protcl, gmm_abnormals_protcl]

            generate_plots(df_abnormal,
                           df_normal,
                           headers,
                           gmms_protcl,
                           attack_type,
                           path=path,
                           protcls_name=protocol_type)
def generate_plots_for_df(df, gmms, path="") :
    headers, _ = preprocessing.get_header_data()
    headers.remove('protocol_type')
    headers.remove('attack')
    headers.remove('difficulty')

    # plot for classes
    protocol_types = model.protocol_types #["udp","tcp","icmp"]

    for protocol_index, protocol_type in enumerate(protocol_types):
        gmm_normals = gmms[0][protocol_index]
        gmm_abnormals = gmms[1][protocol_index]

        # normal data
        df_normal = copy.deepcopy(df)
        df_normal = df_normal[(df_normal["attack"] == 11)] # only select for 1 class 
        df_normal = df_normal[(df_normal["protocol_type"] == protocol_index)]
        df_normal.drop('attack',1,inplace=True) # remove useless 
        df_normal.drop('difficulty',1,inplace=True) # remove useless 
        df_normal.drop('protocol_type',1,inplace=True)
        df_normal.reset_index(drop=True)
        df_normal = df_normal[0:10]

        # abnormal data
        for i, attack_type in enumerate(model.attack_types) :
            if i == 11 :
                continue
            df_abnormal = copy.deepcopy(df)
            df_abnormal = df_abnormal[(df_abnormal["attack"] == i)] # only select for 1 class 
            df_abnormal = df_abnormal[(df_abnormal["protocol_type"] == protocol_index)]

            if 1 >  len(df_abnormal) :
                continue

            df_abnormal.drop('attack',1,inplace=True) # remove useless 
            df_abnormal.drop('difficulty',1,inplace=True) # remove useless 
            df_abnormal.drop('protocol_type',1,inplace=True)
            df_abnormal.reset_index(drop=True)
            df_abnormal = df_abnormal[0:10]

            gmm_normals_protcl = gmms[0][protocol_index]
            gmm_abnormals_protcl = gmms[1][protocol_index]
            gmms_protcl = [gmm_normals_protcl, gmm_abnormals_protcl]

            generate_plots(df_abnormal, df_normal, headers, gmms_protcl, attack_type, path=path, protcls_name = protocol_type)
    print Z.shape
    s = 0
    print Z[0, 0]
    print Z[399, 399]
    for x in range(400):
        for y in range(400):
            s = s + Z[x, y]
    print s
    surf = ax.plot_surface(X, Y, Z, rstride=1, cstride=1, cmap=plt.get_cmap("coolwarm"), linewidth=0, antialiased=True)
    fig.colorbar(surf, shrink=0.5, aspect=5)

    #    plt.savefig('3dgauss.png')
    #    plt.clf()

    plt.show()


if __name__ == "__main__":
    headers, attacks = preprocessing.get_header_data()
    headers.remove("protocol_type")
    headers.remove("attack")
    headers.remove("difficulty")

    df_training_20, df_training_full, gmms_20, gmms_full = preprocessing.get_preprocessed_training_data()
    df_test_20, df_test_full, gmms_test_20, gmms_test_full = preprocessing.get_preprocessed_test_data()

    title = "training20_only"
    logger.debug("#################################################")
    logger.debug(title)
    test()
                           Y,
                           Z,
                           rstride=1,
                           cstride=1,
                           cmap=plt.get_cmap('coolwarm'),
                           linewidth=0,
                           antialiased=True)
    fig.colorbar(surf, shrink=0.5, aspect=5)

    #    plt.savefig('3dgauss.png')
    #    plt.clf()

    plt.show()


if __name__ == '__main__':
    headers, attacks = preprocessing.get_header_data()
    headers.remove('protocol_type')
    headers.remove('attack')
    headers.remove('difficulty')

    df_training_20, df_training_full, gmms_20, gmms_full = preprocessing.get_preprocessed_training_data(
    )
    df_test_20, df_test_full, gmms_test_20, gmms_test_full = preprocessing.get_preprocessed_test_data(
    )

    title = "training20_only"
    logger.debug("#################################################")
    logger.debug(title)
    test()
def show_classes():
    import os
    from nslkdd.data import model
    workpath = os.path.dirname(os.path.abspath(__file__))

    datafile_20 = workpath + '/nslkdd/data/KDDTrain+_20Percent.txt'
    datafile_full = workpath + '/nslkdd/data/KDDTrain+.txt'

    datafile_21 = workpath + '/nslkdd/data/KDDTest-21.txt'
    datafile_plus = workpath + '/nslkdd/data/KDDTest+.txt'

    headers, _ = preprocessing.get_header_data()

    dfs = []
    dfs.append(model.load_dataframe(datafile_20, headers))
    dfs.append(model.load_dataframe(datafile_full, headers))
    dfs.append(model.load_dataframe(datafile_21, headers))
    dfs.append(model.load_dataframe(datafile_plus, headers))

    df = dfs[0]
    df = df.iloc[[1, 3], :]
    print df

    exit()

    # it shows every headers
    #    for di, df in enumerate(dfs[0]) :
    #        print df

    attacks = []
    for df in dfs:
        attacks.append(list(set(df['attack'])))
    #    print attacks[-1]
    only_in_test_data = []
    for i in attacks[3]:
        if i in attacks[1]:
            pass
        else:
            only_in_test_data.append(i)
    total_test_set = attacks[1] + only_in_test_data
    print total_test_set

    # basic
    for di, df in enumerate(dfs):
        print "=====" + str(di) + "======="
        s = 0
        for i in total_test_set:
            det = len(df[df['attack'] == i])
            s = s + det
            print i + " : " + str(len(df[df['attack'] == i]))
        print "------------------"
        print "total : " + str(s)

    print "============================"
    # for tiddly
    df_names = ["Training_20", "Training_full", "Test_21", "Test_plus"]
    import copy
    for attack_type in total_test_set:
        for di, df_orig in enumerate(dfs):
            df = copy.deepcopy(df_orig)
            df = df[df['attack'] == attack_type]
            category_name = str(list(set(df['protocol_type'])))
            df_name = df_names[di]
            perc = len(df) / (len(dfs[di]) * 1.0) * 100.0
            count = str(len(df)) + " / " + str(len(
                dfs[di])) + " (" + "{0:.3f}%".format(perc) + ")"
            bg = " "
            if perc == 0:
                bg = "bgcolor(#cd5c5c): "
            print "| ! " + attack_type + " |" + bg + category_name + " |" + bg + df_name + " |" + bg + str(
                count) + " |" + bg + " |"
def gen_plot(cproj, res, df, highlight_point, title):
    _, attacks = preprocessing.get_header_data()

    # figure setting
    fig, axarr = plt.subplots(4, 4, sharex='col', sharey='row')
    plt.subplots_adjust(wspace=0.4, hspace=0.4)
    plt.xlim(plot_lim_min, plot_lim_max)
    plt.ylim(plot_lim_min, plot_lim_max)
#    plt.xlabel('interval')
#    plt.ylabel('log(probability) + k')
#    plt.title('Convergence plot')
#    plt.grid(True)

    data_per_true_labels = []
    for i in range( len(attacks) ):
        data_per_true_labels.append([])

    true_attack_types = df["attack"].values.tolist()
    for i, d in enumerate(cproj):
        data_per_true_labels[true_attack_types[i]].append(d)

    k = int( len(cproj) * 12/500.0)
    clusters = [0] * k
    cluster_xs = []
    cluster_ys = []
    for i in range(k):
        cluster_xs.append([])
        cluster_ys.append([])
    cluster_xmeans = [0] * k
    cluster_ymeans = [0] * k
    cluster_xstds = [0] * k
    cluster_ystds = [0] * k

    for i, p in enumerate(cproj):
        true_label = true_attack_types[i]
        if true_label == model.attack_normal :
            clusters[ res[i] ] = clusters[ res[i] ] + 1
        else :
            clusters[ res[i] ] = clusters[ res[i] ] - 1
        cluster_xs[ res[i] ].append(p[0])
        cluster_ys[ res[i] ].append(p[1])

    logger.debug("* mean/std of cluster")
    for i, cluster in enumerate(clusters) :
        cluster_xmeans[i] = np.mean(cluster_xs[i])
        cluster_ymeans[i] = np.mean(cluster_ys[i])
        cluster_xstds[i] = np.std(cluster_xs[i])
        cluster_ystds[i] = np.std(cluster_ys[i])
        logger.debug("cluster : " + str(i))
        logger.debug("- size [" + str(len(cluster_xs[i])) + "]")
        logger.debug("- xmin [" + str(cluster_xmeans[i]) + "]")
        logger.debug("- ymin [" + str(cluster_ymeans[i]) + "]")
        logger.debug("- xstd [" + str(cluster_xstds[i]) + "]")
        logger.debug("- ystd [" + str(cluster_ystds[i]) + "]")

    ax1 = axarr[0, 0]
    ax2 = axarr[0, 1]
    ax3 = axarr[0, 2]
    ax4 = axarr[0, 3]
    ax5 = axarr[1, 0]
    ax6 = axarr[1, 1]
    ax7 = axarr[1, 2]
    ax8 = axarr[1, 3]
    ax9 = axarr[2, 0]
    ax10 = axarr[2, 1]
    ax11 = axarr[2, 2]
    ax12 = axarr[2, 3]
    ax13 = axarr[3, 0]
    ax14 = axarr[3, 1]
    ax15 = axarr[3, 2]
    ax16 = axarr[3, 3]

    plot_true_labels(ax1, data_per_true_labels, "True labels", highlight_point)
    plot_normal_label(ax2, data_per_true_labels, "True normals")
    plot_abnormal_label(ax3, data_per_true_labels, "True abnormal")

    ax4.set_title("k-means")
    for i, p in enumerate(cproj):
        ax4.scatter(p[0], p[1], c=colorhex.codes[ res[i] ])
    ##############################################################
    ax5.set_title("Normal res")
    for i, p in enumerate(cproj):
        if clusters[ res[i] ] >= 0 :
            ax5.scatter(p[0], p[1], c='g')
    ##############################################################
    ax6.set_title("Abnormal res")
    for i, p in enumerate(cproj):
        if clusters[ res[i] ] < 0 :
            ax6.scatter(p[0], p[1], c='r')
    ##############################################################
    ax7.set_title("Cluster 1")
    for i, p in enumerate(cproj):
        if res[i] == 0 :
            ax7.scatter(p[0], p[1], c='g')
    ##############################################################
    ax8.set_title("Cluster 2")
    for i, p in enumerate(cproj):
        if res[i] == 1 :
            ax8.scatter(p[0], p[1], c='g')
    ##############################################################
#    ax9.set_title("kmeans")
#    kmean_plot(title, ax9)
    ##############################################################
    ax9.set_title("Cluster 3")
    for i, p in enumerate(cproj):
        if res[i] == 2 :
            ax9.scatter(p[0], p[1], c='g')
    ##############################################################
    ax10.set_title("Cluster 4")
    for i, p in enumerate(cproj):
        if res[i] == 3 :
            ax10.scatter(p[0], p[1], c='g')
    ##############################################################
    ax11.set_title("Cluster 5")
    for i, p in enumerate(cproj):
        if res[i] == 4 :
            ax11.scatter(p[0], p[1], c='g')
    ##############################################################
    ax12.set_title("Cluster 6")
    for i, p in enumerate(cproj):
        if res[i] == 5 :
            ax12.scatter(p[0], p[1], c='g')
    ##############################################################
    ax13.set_title("Cluster 7")
    for i, p in enumerate(cproj):
        if res[i] == 6 :
            ax13.scatter(p[0], p[1], c='g')
    ##############################################################
    ax14.set_title("Cluster 8")
    for i, p in enumerate(cproj):
        if res[i] == 7 :
            ax14.scatter(p[0], p[1], c='g')
    ##############################################################
    ax15.set_title("Cluster 9")
    for i, p in enumerate(cproj):
        if res[i] == 8 :
            ax15.scatter(p[0], p[1], c='g')
    ##############################################################
    ax16.set_title("Cluster 10")
    for i, p in enumerate(cproj):
        if res[i] == 9 :
            ax16.scatter(p[0], p[1], c='g')
    ##############################################################

    print title + " has been saved"
    fig.savefig(today + "/" + title + ".png")
    plt.close()

    fig, ax = plt.subplots(1, 1, sharex='col', sharey='row')
    plt.subplots_adjust(wspace=0.4, hspace=0.4)
    plt.xlim(plot_lim_min, plot_lim_max)
    plt.ylim(plot_lim_min, plot_lim_max)
    for i, p in enumerate(data_per_true_labels) :
        x = np.array([t[0] for t in p])
        y = np.array([t[1] for t in p])
        if i == model.attack_normal:
            colors = ['g'] * len(x)
            ax.scatter(x, y, c=colors)
        elif i != model.attack_normal and i != highlight_point:
            colors = ['r'] * len(x)
            ax.scatter(x, y, c=colors)
    if highlight_point != None :
        p = data_per_true_labels[highlight_point]
        x = np.array([t[0] for t in p])
        y = np.array([t[1] for t in p])
        colors = ['y'] * len(x)
        ax.scatter(x, y, c=colors)
    plt.xlabel('Similarity score to normal')
    plt.ylabel('Similarity score to abnormal')
    plt.title('True labels')
    plt.grid(True)
    fig.savefig(today + "/" + title + "_true_.png")
    plt.close()

    fig, ax = plt.subplots(1, 1, sharex='col', sharey='row')
    plt.subplots_adjust(wspace=0.4, hspace=0.4)
    plt.xlim(plot_lim_min, plot_lim_max)
    plt.ylim(plot_lim_min, plot_lim_max)
    for i, p in enumerate(cproj):
        if clusters[ res[i] ] >= 0 :
            ax.scatter(p[0], p[1], c='g')
        else :
            ax.scatter(p[0], p[1], c='r')
    plt.xlabel('Similarity score to normal')
    plt.ylabel('Similarity score to abnormal')
    plt.title('Prediected labels')
    plt.grid(True)
    fig.savefig(today + "/" + title + "_prediction_.png")
    plt.close()
def test():
    _, attacks = preprocessing.get_header_data()
    dataset_description = "training20_only"
    title = dataset_description
    cproj, res, df, highlight_point = get_data(title)

    fig, axarr = plt.subplots(1, 1, sharex='col', sharey='row')
    plt.subplots_adjust(wspace=0.4, hspace=0.4)
    plt.xlim(plot_lim_min, plot_lim_max)
    plt.ylim(plot_lim_min, plot_lim_max)
    ax = axarr
    ax.set_title("plot")

    data_per_true_labels = []
    for i in range( len(attacks) ):
        data_per_true_labels.append([])
    true_attack_types = df["attack"].values.tolist()
    for i, d in enumerate(cproj):
        data_per_true_labels[true_attack_types[i]].append(d)

    for i, p in enumerate(data_per_true_labels) :
        x = np.array([t[0] for t in p])
        y = np.array([t[1] for t in p])
        if i == model.attack_normal:
            from sklearn.cluster import KMeans
            data = p
            h = .02
            estimator = KMeans(init='k-means++', n_clusters=3)
            estimator.fit(data)
            centroids = estimator.cluster_centers_

            x_min, x_max = min(x) + 1, max(x) - 1
            y_min, y_max = min(y) + 1, max(y) - 1
            xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h))
            Z = estimator.predict(np.c_[xx.ravel(), yy.ravel()])
            Z = Z.reshape(xx.shape)
#            plt.figure(1)
#            plt.clf()

            plt.imshow(Z, interpolation='nearest',
            extent=(xx.min(), xx.max(), yy.min(), yy.max()),
            cmap=plt.cm.Paired,
            aspect='auto', origin='lower')
            plt.imshow(Z, interpolation='nearest',
                       extent=(xx.min(), xx.max(), yy.min(), yy.max()),
                       cmap=plt.cm.Paired,
                       aspect='auto', origin='lower')
            plt.scatter(centroids[:, 0], centroids[:, 1],
                        marker='x', s=169, linewidths=3,
                        color='w', zorder=10)

            colors = ['g'] * len(x)
            ax.scatter(x, y, c=colors)
            ax.scatter(np.mean(x), np.mean(y), c='r')
            ax.scatter(np.median(x), np.median(y), c='b')
            delta = 0.025
            X = np.arange(plot_lim_min, plot_lim_max, delta)
            Y = np.arange(plot_lim_min, plot_lim_max, delta)
            X,Y = np.meshgrid(X,Y)
            Z = mlab.bivariate_normal(X, Y, np.std(x), np.std(y), np.mean(x), np.mean(y))
            plt.contour(X,Y,Z)

#    for i, r in df.iterrows() :
#        if r['attack']
#    for i, p in enumerate(cproj):
#        if res[i] == 8 :
#            ax1.scatter(p[0], p[1], c='g')

#    plt.xticks(())
#    plt.yticks(())

    plt.show()
    plt.close()
def kmean_plot(title, ax):
    _, attacks = preprocessing.get_header_data()
    cproj, res, df, highlight_point = get_data(title)

    plt.subplots_adjust(wspace=0.4, hspace=0.4)
#    plt.xlim(plot_lim_min, plot_lim_max)
#    plt.ylim(plot_lim_min, plot_lim_max)
#    ax = axarr
#    ax.set_title("plot")

    data_per_true_labels = []
    for i in range( len(attacks) ):
        data_per_true_labels.append([])
    true_attack_types = df["attack"].values.tolist()
    for i, d in enumerate(cproj):
        data_per_true_labels[true_attack_types[i]].append(d)

    k = 10
    clusters = [0] * k
    for i, p in enumerate(cproj):
        true_label = true_attack_types[i]
        if true_label == model.attack_normal :
            clusters[ res[i] ] = clusters[ res[i] ] + 1
        else :
            clusters[ res[i] ] = clusters[ res[i] ] - 1

    x = []
    y = []
    p = []
    for ii, pp in enumerate(cproj):
        if clusters[ res[ii] ] > 0 :
            x.append(pp[0])
            y.append(pp[1])
            p.append(pp)

    from sklearn.cluster import KMeans
    data = p
    h = .02
    estimator = KMeans(init='k-means++', n_clusters=3)
    estimator.fit(data)
    centroids = estimator.cluster_centers_

    x_min, x_max = min(x) + 1, max(x) - 1
    y_min, y_max = min(y) + 1, max(y) - 1
    xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h))
    Z = estimator.predict(np.c_[xx.ravel(), yy.ravel()])
    Z = Z.reshape(xx.shape)

    plt.imshow(Z, interpolation='nearest',
    extent=(xx.min(), xx.max(), yy.min(), yy.max()),
    cmap=plt.cm.Paired,
    aspect='auto', origin='lower')
    plt.imshow(Z, interpolation='nearest',
               extent=(xx.min(), xx.max(), yy.min(), yy.max()),
               cmap=plt.cm.Paired,
               aspect='auto', origin='lower')
    plt.scatter(centroids[:, 0], centroids[:, 1],
                marker='x', s=169, linewidths=3,
                color='w', zorder=10)

    colors = ['g'] * len(x)
    ax.scatter(x, y, c=colors)
    ax.scatter(np.mean(x), np.mean(y), c='r')
    ax.scatter(np.median(x), np.median(y), c='b')
    delta = 0.025
    X = np.arange(plot_lim_min, plot_lim_max, delta)
    Y = np.arange(plot_lim_min, plot_lim_max, delta)
    X,Y = np.meshgrid(X,Y)
    Z = mlab.bivariate_normal(X, Y, np.std(x), np.std(y), np.mean(x), np.mean(y))
    plt.contour(X,Y,Z)
Exemple #9
0
def show_classes():
    import os
    from nslkdd.data import model
    workpath = os.path.dirname(os.path.abspath(__file__))

    datafile_20 = workpath + '/nslkdd/data/KDDTrain+_20Percent.txt'
    datafile_full = workpath + '/nslkdd/data/KDDTrain+.txt'

    datafile_21 = workpath + '/nslkdd/data/KDDTest-21.txt'
    datafile_plus = workpath + '/nslkdd/data/KDDTest+.txt'

    headers, _ = preprocessing.get_header_data()

    dfs = []
    dfs.append(model.load_dataframe(datafile_20,headers))
    dfs.append(model.load_dataframe(datafile_full,headers))
    dfs.append(model.load_dataframe(datafile_21,headers))
    dfs.append(model.load_dataframe(datafile_plus,headers))

    attacks = []
    for df in dfs :
        attacks.append( list(set(df['attack'])) )
    #    print attacks[-1]
    only_in_test_data = []
    for i in attacks[3] :
        if i in attacks[1] :
            pass
        else :
            only_in_test_data.append(i)
    print attacks
    total_test_set = attacks[1] + only_in_test_data
    # print attacks 
    print total_test_set

    # basic
    for di, df in enumerate(dfs) :
        print "=====" + str(di) + "======="
        s = 0 
        for i in total_test_set :
            det = len(df[df['attack']==i])
            s = s + det
            print i + " : " + str(len (df[df['attack']==i]))
        print "------------------"
        print "total : " + str(s)

    print "============================"
    # for tiddly
    df_names = ["Training_20", "Training_full", "Test_21", "Test_plus"]
    import copy
    for attack_type in total_test_set :
        for di, df_orig in enumerate(dfs) :
            df = copy.deepcopy(df_orig)
            df = df[df['attack'] == attack_type]
            category_name = str(list(set(df['protocol_type'])))
            df_name = df_names[di]
            perc = len(df) / (len(dfs[di])*1.0) * 100.0
            count = str(len(df)) + " / " + str(len(dfs[di])) + " (" + "{0:.3f}%".format(perc) + ")"
            bg = " "
            if perc == 0 :
                bg = "bgcolor(#cd5c5c): "
            print "| ! " + attack_type + " |" + bg + category_name + " |" + bg + df_name + " |" + bg + str(count) + " |" + bg + " |"
def gen_plot(cproj, res, df, highlight_point, title):
    _, attacks = preprocessing.get_header_data()

    # figure setting
    fig, axarr = plt.subplots(4, 4, sharex='col', sharey='row')
    plt.subplots_adjust(wspace=0.4, hspace=0.4)
    plt.xlim(plot_lim_min, plot_lim_max)
    plt.ylim(plot_lim_min, plot_lim_max)
    #    plt.xlabel('interval')
    #    plt.ylabel('log(probability) + k')
    #    plt.title('Convergence plot')
    #    plt.grid(True)

    data_per_true_labels = []
    for i in range(len(attacks)):
        data_per_true_labels.append([])

    true_attack_types = df["attack"].values.tolist()
    for i, d in enumerate(cproj):
        data_per_true_labels[true_attack_types[i]].append(d)

    k = int(len(cproj) * 12 / 500.0)
    clusters = [0] * k
    cluster_xs = []
    cluster_ys = []
    for i in range(k):
        cluster_xs.append([])
        cluster_ys.append([])
    cluster_xmeans = [0] * k
    cluster_ymeans = [0] * k
    cluster_xstds = [0] * k
    cluster_ystds = [0] * k

    for i, p in enumerate(cproj):
        true_label = true_attack_types[i]
        if true_label == model.attack_normal:
            clusters[res[i]] = clusters[res[i]] + 1
        else:
            clusters[res[i]] = clusters[res[i]] - 1
        cluster_xs[res[i]].append(p[0])
        cluster_ys[res[i]].append(p[1])

    logger.debug("* mean/std of cluster")
    for i, cluster in enumerate(clusters):
        cluster_xmeans[i] = np.mean(cluster_xs[i])
        cluster_ymeans[i] = np.mean(cluster_ys[i])
        cluster_xstds[i] = np.std(cluster_xs[i])
        cluster_ystds[i] = np.std(cluster_ys[i])
        logger.debug("cluster : " + str(i))
        logger.debug("- size [" + str(len(cluster_xs[i])) + "]")
        logger.debug("- xmin [" + str(cluster_xmeans[i]) + "]")
        logger.debug("- ymin [" + str(cluster_ymeans[i]) + "]")
        logger.debug("- xstd [" + str(cluster_xstds[i]) + "]")
        logger.debug("- ystd [" + str(cluster_ystds[i]) + "]")

    ax1 = axarr[0, 0]
    ax2 = axarr[0, 1]
    ax3 = axarr[0, 2]
    ax4 = axarr[0, 3]
    ax5 = axarr[1, 0]
    ax6 = axarr[1, 1]
    ax7 = axarr[1, 2]
    ax8 = axarr[1, 3]
    ax9 = axarr[2, 0]
    ax10 = axarr[2, 1]
    ax11 = axarr[2, 2]
    ax12 = axarr[2, 3]
    ax13 = axarr[3, 0]
    ax14 = axarr[3, 1]
    ax15 = axarr[3, 2]
    ax16 = axarr[3, 3]

    plot_true_labels(ax1, data_per_true_labels, "True labels", highlight_point)
    plot_normal_label(ax2, data_per_true_labels, "True normals")
    plot_abnormal_label(ax3, data_per_true_labels, "True abnormal")

    ax4.set_title("k-means")
    for i, p in enumerate(cproj):
        ax4.scatter(p[0], p[1], c=colorhex.codes[res[i]])
    ##############################################################
    ax5.set_title("Normal res")
    for i, p in enumerate(cproj):
        if clusters[res[i]] >= 0:
            ax5.scatter(p[0], p[1], c='g')
    ##############################################################
    ax6.set_title("Abnormal res")
    for i, p in enumerate(cproj):
        if clusters[res[i]] < 0:
            ax6.scatter(p[0], p[1], c='r')
    ##############################################################
    ax7.set_title("Cluster 1")
    for i, p in enumerate(cproj):
        if res[i] == 0:
            ax7.scatter(p[0], p[1], c='g')
    ##############################################################
    ax8.set_title("Cluster 2")
    for i, p in enumerate(cproj):
        if res[i] == 1:
            ax8.scatter(p[0], p[1], c='g')
    ##############################################################


#    ax9.set_title("kmeans")
#    kmean_plot(title, ax9)
##############################################################
    ax9.set_title("Cluster 3")
    for i, p in enumerate(cproj):
        if res[i] == 2:
            ax9.scatter(p[0], p[1], c='g')
    ##############################################################
    ax10.set_title("Cluster 4")
    for i, p in enumerate(cproj):
        if res[i] == 3:
            ax10.scatter(p[0], p[1], c='g')
    ##############################################################
    ax11.set_title("Cluster 5")
    for i, p in enumerate(cproj):
        if res[i] == 4:
            ax11.scatter(p[0], p[1], c='g')
    ##############################################################
    ax12.set_title("Cluster 6")
    for i, p in enumerate(cproj):
        if res[i] == 5:
            ax12.scatter(p[0], p[1], c='g')
    ##############################################################
    ax13.set_title("Cluster 7")
    for i, p in enumerate(cproj):
        if res[i] == 6:
            ax13.scatter(p[0], p[1], c='g')
    ##############################################################
    ax14.set_title("Cluster 8")
    for i, p in enumerate(cproj):
        if res[i] == 7:
            ax14.scatter(p[0], p[1], c='g')
    ##############################################################
    ax15.set_title("Cluster 9")
    for i, p in enumerate(cproj):
        if res[i] == 8:
            ax15.scatter(p[0], p[1], c='g')
    ##############################################################
    ax16.set_title("Cluster 10")
    for i, p in enumerate(cproj):
        if res[i] == 9:
            ax16.scatter(p[0], p[1], c='g')
    ##############################################################

    print title + " has been saved"
    fig.savefig(today + "/" + title + ".png")
    plt.close()

    fig, ax = plt.subplots(1, 1, sharex='col', sharey='row')
    plt.subplots_adjust(wspace=0.4, hspace=0.4)
    plt.xlim(plot_lim_min, plot_lim_max)
    plt.ylim(plot_lim_min, plot_lim_max)
    for i, p in enumerate(data_per_true_labels):
        x = np.array([t[0] for t in p])
        y = np.array([t[1] for t in p])
        if i == model.attack_normal:
            colors = ['g'] * len(x)
            ax.scatter(x, y, c=colors)
        elif i != model.attack_normal and i != highlight_point:
            colors = ['r'] * len(x)
            ax.scatter(x, y, c=colors)
    if highlight_point != None:
        p = data_per_true_labels[highlight_point]
        x = np.array([t[0] for t in p])
        y = np.array([t[1] for t in p])
        colors = ['y'] * len(x)
        ax.scatter(x, y, c=colors)
    plt.xlabel('Similarity score to normal')
    plt.ylabel('Similarity score to abnormal')
    plt.title('True labels')
    plt.grid(True)
    fig.savefig(today + "/" + title + "_true_.png")
    plt.close()

    fig, ax = plt.subplots(1, 1, sharex='col', sharey='row')
    plt.subplots_adjust(wspace=0.4, hspace=0.4)
    plt.xlim(plot_lim_min, plot_lim_max)
    plt.ylim(plot_lim_min, plot_lim_max)
    for i, p in enumerate(cproj):
        if clusters[res[i]] >= 0:
            ax.scatter(p[0], p[1], c='g')
        else:
            ax.scatter(p[0], p[1], c='r')
    plt.xlabel('Similarity score to normal')
    plt.ylabel('Similarity score to abnormal')
    plt.title('Prediected labels')
    plt.grid(True)
    fig.savefig(today + "/" + title + "_prediction_.png")
    plt.close()
def test():
    _, attacks = preprocessing.get_header_data()
    dataset_description = "training20_only"
    title = dataset_description
    cproj, res, df, highlight_point = get_data(title)

    fig, axarr = plt.subplots(1, 1, sharex='col', sharey='row')
    plt.subplots_adjust(wspace=0.4, hspace=0.4)
    plt.xlim(plot_lim_min, plot_lim_max)
    plt.ylim(plot_lim_min, plot_lim_max)
    ax = axarr
    ax.set_title("plot")

    data_per_true_labels = []
    for i in range(len(attacks)):
        data_per_true_labels.append([])
    true_attack_types = df["attack"].values.tolist()
    for i, d in enumerate(cproj):
        data_per_true_labels[true_attack_types[i]].append(d)

    for i, p in enumerate(data_per_true_labels):
        x = np.array([t[0] for t in p])
        y = np.array([t[1] for t in p])
        if i == model.attack_normal:
            from sklearn.cluster import KMeans
            data = p
            h = .02
            estimator = KMeans(init='k-means++', n_clusters=3)
            estimator.fit(data)
            centroids = estimator.cluster_centers_

            x_min, x_max = min(x) + 1, max(x) - 1
            y_min, y_max = min(y) + 1, max(y) - 1
            xx, yy = np.meshgrid(np.arange(x_min, x_max, h),
                                 np.arange(y_min, y_max, h))
            Z = estimator.predict(np.c_[xx.ravel(), yy.ravel()])
            Z = Z.reshape(xx.shape)
            #            plt.figure(1)
            #            plt.clf()

            plt.imshow(Z,
                       interpolation='nearest',
                       extent=(xx.min(), xx.max(), yy.min(), yy.max()),
                       cmap=plt.cm.Paired,
                       aspect='auto',
                       origin='lower')
            plt.imshow(Z,
                       interpolation='nearest',
                       extent=(xx.min(), xx.max(), yy.min(), yy.max()),
                       cmap=plt.cm.Paired,
                       aspect='auto',
                       origin='lower')
            plt.scatter(centroids[:, 0],
                        centroids[:, 1],
                        marker='x',
                        s=169,
                        linewidths=3,
                        color='w',
                        zorder=10)

            colors = ['g'] * len(x)
            ax.scatter(x, y, c=colors)
            ax.scatter(np.mean(x), np.mean(y), c='r')
            ax.scatter(np.median(x), np.median(y), c='b')
            delta = 0.025
            X = np.arange(plot_lim_min, plot_lim_max, delta)
            Y = np.arange(plot_lim_min, plot_lim_max, delta)
            X, Y = np.meshgrid(X, Y)
            Z = mlab.bivariate_normal(X, Y, np.std(x), np.std(y), np.mean(x),
                                      np.mean(y))
            plt.contour(X, Y, Z)


#    for i, r in df.iterrows() :
#        if r['attack']
#    for i, p in enumerate(cproj):
#        if res[i] == 8 :
#            ax1.scatter(p[0], p[1], c='g')

#    plt.xticks(())
#    plt.yticks(())

    plt.show()
    plt.close()
def kmean_plot(title, ax):
    _, attacks = preprocessing.get_header_data()
    cproj, res, df, highlight_point = get_data(title)

    plt.subplots_adjust(wspace=0.4, hspace=0.4)
    #    plt.xlim(plot_lim_min, plot_lim_max)
    #    plt.ylim(plot_lim_min, plot_lim_max)
    #    ax = axarr
    #    ax.set_title("plot")

    data_per_true_labels = []
    for i in range(len(attacks)):
        data_per_true_labels.append([])
    true_attack_types = df["attack"].values.tolist()
    for i, d in enumerate(cproj):
        data_per_true_labels[true_attack_types[i]].append(d)

    k = 10
    clusters = [0] * k
    for i, p in enumerate(cproj):
        true_label = true_attack_types[i]
        if true_label == model.attack_normal:
            clusters[res[i]] = clusters[res[i]] + 1
        else:
            clusters[res[i]] = clusters[res[i]] - 1

    x = []
    y = []
    p = []
    for ii, pp in enumerate(cproj):
        if clusters[res[ii]] > 0:
            x.append(pp[0])
            y.append(pp[1])
            p.append(pp)

    from sklearn.cluster import KMeans
    data = p
    h = .02
    estimator = KMeans(init='k-means++', n_clusters=3)
    estimator.fit(data)
    centroids = estimator.cluster_centers_

    x_min, x_max = min(x) + 1, max(x) - 1
    y_min, y_max = min(y) + 1, max(y) - 1
    xx, yy = np.meshgrid(np.arange(x_min, x_max, h),
                         np.arange(y_min, y_max, h))
    Z = estimator.predict(np.c_[xx.ravel(), yy.ravel()])
    Z = Z.reshape(xx.shape)

    plt.imshow(Z,
               interpolation='nearest',
               extent=(xx.min(), xx.max(), yy.min(), yy.max()),
               cmap=plt.cm.Paired,
               aspect='auto',
               origin='lower')
    plt.imshow(Z,
               interpolation='nearest',
               extent=(xx.min(), xx.max(), yy.min(), yy.max()),
               cmap=plt.cm.Paired,
               aspect='auto',
               origin='lower')
    plt.scatter(centroids[:, 0],
                centroids[:, 1],
                marker='x',
                s=169,
                linewidths=3,
                color='w',
                zorder=10)

    colors = ['g'] * len(x)
    ax.scatter(x, y, c=colors)
    ax.scatter(np.mean(x), np.mean(y), c='r')
    ax.scatter(np.median(x), np.median(y), c='b')
    delta = 0.025
    X = np.arange(plot_lim_min, plot_lim_max, delta)
    Y = np.arange(plot_lim_min, plot_lim_max, delta)
    X, Y = np.meshgrid(X, Y)
    Z = mlab.bivariate_normal(X, Y, np.std(x), np.std(y), np.mean(x),
                              np.mean(y))
    plt.contour(X, Y, Z)