Example #1
0
from __future__ import print_function
from matplotlib import pyplot as plt
from matplotlib import colors as mcolors
import numpy as np
import Read_Data as RD

colors = dict(mcolors.BASE_COLORS, **mcolors.CSS4_COLORS)

#dir = "wine-5-fold/wine-5-1tra.dat"
dir = "KSMOTE_IECON15_InputData.csv"

RD.Initialize_Data(dir)

for i in range(0, RD.Num_Features):
    for j in range(i + 1, RD.Num_Features):
        if i != j:
            fig = plt.figure()
            p1 = plt.scatter(RD.Stage_1_Feature[:, i],
                             RD.Stage_1_Feature[:, j],
                             marker='o',
                             color='#539caf',
                             label='1',
                             s=10,
                             alpha=0.4)
            p2 = plt.scatter(RD.Stage_2_Feature[:, i],
                             RD.Stage_2_Feature[:, j],
                             marker='+',
                             color=colors["forestgreen"],
                             label='2',
                             s=20,
                             alpha=0.6)
            if ii not in nominal_feature:
                z[ii] = np.max(
                    np.where(bounds[:, ii] <= initial_sample[ii])[0])
                if z[ii] > 99:
                    z[ii] -= 1
            else:
                z[ii] = initial_sample[ii]
        data[k, :] = z


file = 'High_IR_Data/shuttle-2_vs_5.dat'

name = file.split('.')[0]
print(name)

RD.Initialize_Data(file)

print('Number of Positive: ', RD.Num_positive)
print('Number of Negative: ', RD.Num_negative)

nominal_feature = []
data = RD.get_feature()
num_samples = data.shape[0]
num_features = data.shape[1]
num_bins = 100
bounds = np.zeros((num_bins + 1, num_features))
for i in range(num_features):
    if i not in nominal_feature:
        bounds[:, i] = np.histogram(data[:, i], bins=num_bins)[1]

nf = RD.get_negative_feature()
from __future__ import print_function
from matplotlib import pyplot as plt
import pandas as pd
import numpy as np
from matplotlib import colors as mcolors
import Read_Data as RD
import seaborn as sns

colors = dict(mcolors.BASE_COLORS, **mcolors.CSS4_COLORS)

#file = 'shuttle-2_vs_5.dat'
file = 'abalone19.dat'
name = file.split('.')[0]
RD.Initialize_Data(file,
                   has_nominal=True,
                   nominal_index=[0],
                   nominal_value=['M', 'F', 'I'])
print('Number of Positive: ', RD.Num_positive)
print('Number of Negative: ', RD.Num_negative)

#df = pd.DataFrame(RD.Features, columns=['A1', 'A2', 'A3', 'A4', 'A5', 'A6', 'A7', 'A8', 'A9'])
df = pd.DataFrame(RD.Features,
                  columns=[
                      'Sex', 'Length', 'Diameter', 'Height', 'Whole_Weight',
                      'Shucked_Weight', 'Viscera_Weight', 'Shell_Weight'
                  ])
#df['Label'] = pd.Series(RD.Labels, index=df.index)
df['Label'] = RD.Labels

#sns.FacetGrid(df, hue='Label').map(plt.scatter, 'Sex', 'Length')
Num_Cross_Folders = 5
G_Mean = np.linspace(0, 0, Num_Cross_Folders)
Sensitivity = np.linspace(0, 0, Num_Cross_Folders)
Specificity = np.linspace(0, 0, Num_Cross_Folders)
G_Mean_GAN = np.linspace(0, 0, Num_Cross_Folders)
Sensitivity_GAN = np.linspace(0, 0, Num_Cross_Folders)
Specificity_GAN = np.linspace(0, 0, Num_Cross_Folders)

for j in range(Num_Cross_Folders):
    #        dir_train = "glass1-5-fold/glass1-5-" + str(j+1) + "tra.dat"
    #        dir_test = "glass1-5-fold/glass1-5-" + str(j+1) + "tst.dat"
    dir_train = "page-blocks0-5-fold/page-blocks0-5-" + str(j + 1) + "tra.dat"
    dir_test = "page-blocks0-5-fold/page-blocks0-5-" + str(j + 1) + "tst.dat"

    RD.Initialize_Data(dir_train)
    Train_Feature = RD.get_feature()
    Train_Label = RD.get_label()
    Train_Label = Train_Label.ravel()
    print(Train_Feature.shape)
    print(Train_Label.size)

    #    clf = svm.SVC(C=1, kernel='rbf', gamma= 0.2)
    #    clf.fit(Train_Feature, Train_Label)

    Feature_samples = RD.get_positive_feature()
    G = GAN_Build(Feature_samples)
    Sudo_Samples = Over_Sampling(G, RD.Num_negative - RD.Num_positive, 6)
    print(Sudo_Samples[0])
    print(Sudo_Samples[-1])
    Train_Feature = np.concatenate((Train_Feature, Sudo_Samples))
#nominal_value = ['M', 'F', 'I']

nominal_index = [1,2,3]
nominal_value = [['icmp', 'tcp', 'udp'],
                 ['auth', 'bgp', 'courier', 'csnet_ns', 'ctf', 'daytime', 'discard', 'domain', 'domain_u',
                  'echo', 'eco_i', 'ecr_i', 'efs', 'exec', 'finger', 'ftp', 'ftp_data', 'gopher', 'hostnames',
                  'http', 'http_443', 'imap4', 'IRC', 'iso_tsap', 'klogin', 'kshell', 'ldap', 'link', 'login',
                  'mtp', 'name', 'netbios_dgm', 'netbios_ns', 'netbios_ssn', 'netstat', 'nnsp', 'nntp', 'ntp_u',
                  'other', 'pm_dump', 'pop_2', 'pop_3', 'printer', 'private', 'red_i', 'remote_job', 'rje', 'shell',
                  'smtp', 'sql_net', 'ssh', 'sunrpc', 'supdup', 'systat', 'telnet', 'tftp_u', 'time', 'tim_i', 'urh_i',
                  'urp_i', 'uucp', 'uucp_path', 'vmnet', 'whois', 'X11', 'Z39_50'],
                 ['OTH', 'REJ', 'RSTO', 'RSTOS0', 'RSTR', 'S0', 'S1', 'S2', 'S3', 'SF', 'SH']]


#RD.Initialize_Data(file)
RD.Initialize_Data(file, has_nominal=True, nominal_index=nominal_index, nominal_value=nominal_value)
print('Number of Positive: ', RD.Num_positive)
print('Number of Negative: ', RD.Num_negative)

nominal_feature = [1,2,3,6,7,8,10,11,13,14,17,18,19,20,21]
#nominal_feature = [0,1,2,3,4,5,6,7,8,9]
data = RD.get_feature()
num_samples = data.shape[0]
num_features = data.shape[1]
num_bins = 100
bounds = np.zeros((num_bins+1, num_features))
for i in range(num_features):
    if i not in nominal_feature:
        bounds[:, i] = np.histogram(data[:, i], bins=num_bins)[1]

nf = RD.get_negative_feature()