import numpy as np from algorithms.utils import _read_split,_class_split from algorithms.sdae import sdae from algorithms.clf_utils import _clf_dtree,_clf_svm,_clf_mlp from sklearn.preprocessing import MinMaxScaler trX, teX, trY, teY = _read_split("../datasets/nd-data/boundary.csv",read=1) #implementation of dual encoded features algorithm #link to paper: http://www.sciencedirect.com/science/article/pii/S0031320316301303 # from algorithms.utils import _read_dat # trX, teX, trY, teY = _read_dat( # "dataset/page-blocks0.dat",skip=15, # read=1,oneHot=0) def _sdae_module(X,Y,layer,batch_range): scaler=MinMaxScaler() X=scaler.fit_transform(X) X1=sdae(X,layer,batch_range,"sigmoid") X2=sdae(X,layer,batch_range,"tanh") Xy1=np.column_stack((X1,Y)) Xy2=np.column_stack((X2,Y)) Xy=np.vstack((Xy1,Xy2)) np.random.shuffle(Xy) y=Xy[:,Xy.shape[1]-1] X=np.delete(Xy,Xy.shape[1]-1,axis=1) return X,y
import numpy as np from algorithms.sdae_s import sdae_syn from algorithms.utils import _read_split, _class_split from algorithms.clf_utils import _clf_dtree, _clf_svm, _clf_mlp, _clf_softmax from sklearn.preprocessing import MinMaxScaler #generate synthetic samples using deep sdae # from algorithms.utils import _read_dat # trX, teX, trY, teY = _read_dat( # "dataset/page-blocks0.dat",skip=15, # read=1,oneHot=0) trX, teX, trY, teY = _read_split( "../datasets/nd-data/kddcup2004-protein-homology-train.csv", read=1, oneHot=0) scaler = MinMaxScaler() trX = scaler.fit_transform(trX) teX = scaler.fit_transform(teX) X0, X1 = _class_split(trX, trY) print "smaller class shape", X1.shape print "Enter hidden layer for SDAE" layer_sdae = input() layer_sdae = layer_sdae + [X1.shape[1]] print "Enter oversampling percent" P = int(input())
import numpy as np from algorithms.sdae_s import sdae_syn from algorithms.utils import _read_split,_class_split from algorithms.clf_utils import _clf_dtree,_clf_svm,_clf_mlp,_clf_softmax from sklearn.preprocessing import MinMaxScaler #generate synthetic samples using deep sdae # from algorithms.utils import _read_dat # trX, teX, trY, teY = _read_dat( # "dataset/page-blocks0.dat",skip=15, # read=1,oneHot=0) trX, teX, trY, teY = _read_split( "../datasets/nd-data/kddcup2004-protein-homology-train.csv", read=1,oneHot=0) scaler=MinMaxScaler() trX=scaler.fit_transform(trX) teX=scaler.fit_transform(teX) X0,X1=_class_split(trX,trY) print "smaller class shape",X1.shape print "Enter hidden layer for SDAE" layer_sdae=input() layer_sdae=layer_sdae+[X1.shape[1]] print "Enter oversampling percent" P=int(input())
from algorithms.utils import _read_split,_class_split from visual.plot import plot_data,plot_syn,plot_X from algorithms.smote import SMOTE import warnings trX, teX, trY, teY = _read_split( "../datasets/nd-data/segment.csv", read=1,oneHot=0) C0,C1=_class_split(trX,trY) warnings.filterwarnings("ignore", category=DeprecationWarning) syn=SMOTE(C1,100,5) #to analyze training data plot_data(trX,trY) #to compare synthetic samples plot_syn(C1,syn) #to analyze the class plot_X(C1)
import numpy as np from sklearn.preprocessing import MinMaxScaler from algorithms.utils import _read_split, _class_split, _one_hot from algorithms.daego import DAEGO from algorithms.daf import DAF from algorithms.clf_utils import _clf_dtree, _clf_svm, _clf_mlp #improvement of daego method using stacked encoders #Flowchart #1. Transform to higher dimension using DAF #2. Generate Synthetic Samples using DAEGO #3. Transform back to original dimension trX, teX, trY, teY = _read_split("../datasets/nd-data/coil2000.csv", read=1, oneHot=0) scaler = MinMaxScaler() trX = scaler.fit_transform(trX) print "Enter oversampling percent" P = int(input()) X0, X1 = _class_split(trX, trY) print "X0 shape", X0.shape print "X1 shape", X1.shape print "Enter layer for DAF" layer = input() inp_shape = [trX.shape[1]] layer_daf = inp_shape + layer print "Enter batch Range for X0"
from sklearn.preprocessing import MinMaxScaler from algorithms.utils import _read_split,_class_split,_one_hot from algorithms.daego import DAEGO from algorithms.daf import DAF from algorithms.clf_utils import _clf_dtree,_clf_svm,_clf_mlp #improvement of daego method using stacked encoders #Flowchart #1. Transform to higher dimension using DAF #2. Generate Synthetic Samples using DAEGO #3. Transform back to original dimension trX, teX, trY, teY = _read_split("../datasets/nd-data/coil2000.csv",read=1,oneHot=0) scaler=MinMaxScaler() trX=scaler.fit_transform(trX) print "Enter oversampling percent" P=int(input()) X0,X1=_class_split(trX,trY) print "X0 shape",X0.shape print "X1 shape",X1.shape print "Enter layer for DAF" layer=input() inp_shape=[trX.shape[1]] layer_daf=inp_shape+layer print "Enter batch Range for X0" x0_batch=input()
from algorithms.utils import _read_split, _class_split from visual.plot import plot_data, plot_syn, plot_X from algorithms.smote import SMOTE import warnings trX, teX, trY, teY = _read_split("../datasets/nd-data/segment.csv", read=1, oneHot=0) C0, C1 = _class_split(trX, trY) warnings.filterwarnings("ignore", category=DeprecationWarning) syn = SMOTE(C1, 100, 5) #to analyze training data plot_data(trX, trY) #to compare synthetic samples plot_syn(C1, syn) #to analyze the class plot_X(C1)