from __future__ import print_function from matplotlib import pyplot as plt from matplotlib import colors as mcolors import numpy as np import Read_Data as RD colors = dict(mcolors.BASE_COLORS, **mcolors.CSS4_COLORS) #dir = "wine-5-fold/wine-5-1tra.dat" dir = "KSMOTE_IECON15_InputData.csv" RD.Initialize_Data(dir) for i in range(0, RD.Num_Features): for j in range(i + 1, RD.Num_Features): if i != j: fig = plt.figure() p1 = plt.scatter(RD.Stage_1_Feature[:, i], RD.Stage_1_Feature[:, j], marker='o', color='#539caf', label='1', s=10, alpha=0.4) p2 = plt.scatter(RD.Stage_2_Feature[:, i], RD.Stage_2_Feature[:, j], marker='+', color=colors["forestgreen"], label='2', s=20, alpha=0.6)
if ii not in nominal_feature: z[ii] = np.max( np.where(bounds[:, ii] <= initial_sample[ii])[0]) if z[ii] > 99: z[ii] -= 1 else: z[ii] = initial_sample[ii] data[k, :] = z file = 'High_IR_Data/shuttle-2_vs_5.dat' name = file.split('.')[0] print(name) RD.Initialize_Data(file) print('Number of Positive: ', RD.Num_positive) print('Number of Negative: ', RD.Num_negative) nominal_feature = [] data = RD.get_feature() num_samples = data.shape[0] num_features = data.shape[1] num_bins = 100 bounds = np.zeros((num_bins + 1, num_features)) for i in range(num_features): if i not in nominal_feature: bounds[:, i] = np.histogram(data[:, i], bins=num_bins)[1] nf = RD.get_negative_feature()
from __future__ import print_function from matplotlib import pyplot as plt import pandas as pd import numpy as np from matplotlib import colors as mcolors import Read_Data as RD import seaborn as sns colors = dict(mcolors.BASE_COLORS, **mcolors.CSS4_COLORS) #file = 'shuttle-2_vs_5.dat' file = 'abalone19.dat' name = file.split('.')[0] RD.Initialize_Data(file, has_nominal=True, nominal_index=[0], nominal_value=['M', 'F', 'I']) print('Number of Positive: ', RD.Num_positive) print('Number of Negative: ', RD.Num_negative) #df = pd.DataFrame(RD.Features, columns=['A1', 'A2', 'A3', 'A4', 'A5', 'A6', 'A7', 'A8', 'A9']) df = pd.DataFrame(RD.Features, columns=[ 'Sex', 'Length', 'Diameter', 'Height', 'Whole_Weight', 'Shucked_Weight', 'Viscera_Weight', 'Shell_Weight' ]) #df['Label'] = pd.Series(RD.Labels, index=df.index) df['Label'] = RD.Labels #sns.FacetGrid(df, hue='Label').map(plt.scatter, 'Sex', 'Length')
Num_Cross_Folders = 5 G_Mean = np.linspace(0, 0, Num_Cross_Folders) Sensitivity = np.linspace(0, 0, Num_Cross_Folders) Specificity = np.linspace(0, 0, Num_Cross_Folders) G_Mean_GAN = np.linspace(0, 0, Num_Cross_Folders) Sensitivity_GAN = np.linspace(0, 0, Num_Cross_Folders) Specificity_GAN = np.linspace(0, 0, Num_Cross_Folders) for j in range(Num_Cross_Folders): # dir_train = "glass1-5-fold/glass1-5-" + str(j+1) + "tra.dat" # dir_test = "glass1-5-fold/glass1-5-" + str(j+1) + "tst.dat" dir_train = "page-blocks0-5-fold/page-blocks0-5-" + str(j + 1) + "tra.dat" dir_test = "page-blocks0-5-fold/page-blocks0-5-" + str(j + 1) + "tst.dat" RD.Initialize_Data(dir_train) Train_Feature = RD.get_feature() Train_Label = RD.get_label() Train_Label = Train_Label.ravel() print(Train_Feature.shape) print(Train_Label.size) # clf = svm.SVC(C=1, kernel='rbf', gamma= 0.2) # clf.fit(Train_Feature, Train_Label) Feature_samples = RD.get_positive_feature() G = GAN_Build(Feature_samples) Sudo_Samples = Over_Sampling(G, RD.Num_negative - RD.Num_positive, 6) print(Sudo_Samples[0]) print(Sudo_Samples[-1]) Train_Feature = np.concatenate((Train_Feature, Sudo_Samples))
#nominal_value = ['M', 'F', 'I'] nominal_index = [1,2,3] nominal_value = [['icmp', 'tcp', 'udp'], ['auth', 'bgp', 'courier', 'csnet_ns', 'ctf', 'daytime', 'discard', 'domain', 'domain_u', 'echo', 'eco_i', 'ecr_i', 'efs', 'exec', 'finger', 'ftp', 'ftp_data', 'gopher', 'hostnames', 'http', 'http_443', 'imap4', 'IRC', 'iso_tsap', 'klogin', 'kshell', 'ldap', 'link', 'login', 'mtp', 'name', 'netbios_dgm', 'netbios_ns', 'netbios_ssn', 'netstat', 'nnsp', 'nntp', 'ntp_u', 'other', 'pm_dump', 'pop_2', 'pop_3', 'printer', 'private', 'red_i', 'remote_job', 'rje', 'shell', 'smtp', 'sql_net', 'ssh', 'sunrpc', 'supdup', 'systat', 'telnet', 'tftp_u', 'time', 'tim_i', 'urh_i', 'urp_i', 'uucp', 'uucp_path', 'vmnet', 'whois', 'X11', 'Z39_50'], ['OTH', 'REJ', 'RSTO', 'RSTOS0', 'RSTR', 'S0', 'S1', 'S2', 'S3', 'SF', 'SH']] #RD.Initialize_Data(file) RD.Initialize_Data(file, has_nominal=True, nominal_index=nominal_index, nominal_value=nominal_value) print('Number of Positive: ', RD.Num_positive) print('Number of Negative: ', RD.Num_negative) nominal_feature = [1,2,3,6,7,8,10,11,13,14,17,18,19,20,21] #nominal_feature = [0,1,2,3,4,5,6,7,8,9] data = RD.get_feature() num_samples = data.shape[0] num_features = data.shape[1] num_bins = 100 bounds = np.zeros((num_bins+1, num_features)) for i in range(num_features): if i not in nominal_feature: bounds[:, i] = np.histogram(data[:, i], bins=num_bins)[1] nf = RD.get_negative_feature()