return data.iloc[1:]

ans = augment(a,
              ignore_index=1,
              times=1,
              std=0.1,
              sample_size=1000,
              add_initial=False)

#%%

#dataset = gf.get_dataset()
dataset = X_train.copy()
length = len(dataset)
new = pd.DataFrame()

for i in range(length):
    row = dataset.iloc[[i]]
    row.insert(0, 'y', y_train.iloc[i])
    ans = augment(row,
                  ignore_index=0,
                  times=1,
                  std=0.1,
                  sample_size=1000,
                  add_initial=False)
    new = new.append(ans)
#%%
dataset = dataset.append(new)
[X_train, X_test, y_train, y_test] = gf.divide(dataset)
print(gf.run_xgboost(X_train, y_train, X_test, y_test))

#a,b=augmenter(X_train,y_train,sd=0.1,col=1)


def get_index_combo(data, num):
    lis = gf.get_top_n(data["accuracy"], num)
    temp = gf.subsets(lis)
    return temp


index_list = get_index_combo(data, 7)

data1 = pd.DataFrame()
for i in index_list:
    Xt, yt = X_train.copy(), y_train.copy()
    print("Currently augmenting")
    print(i)
    for j in i:
        Xtr, ytr = augmenter(X_train.copy(), y_train.copy(), sd=0.1,
                             col=j)  #creating augmented values
        Xt = Xt.append(Xtr)
        yt = yt.append(pd.Series(ytr))
    d = gf.run_xgboost(Xt, yt, X_test, y_test)
    d["combination"] = i
    data1 = data1.append(d, ignore_index=True)
    print(d["accuracy"])
    print("-------")

data1.to_csv("data/data3.csv")
Ejemplo n.º 3
0
Xtr = gf.loadfile("Xtr")
Xte = gf.loadfile("Xte")
ytr = gf.loadfile("ytr")
yte = gf.loadfile("yte")
temp = gf.convertstrtolist(temp)

Xt,yt = Xtr.copy(),ytr.copy()
print("Currently augmenting")
for j in temp:
    Xtr1,ytr1=augmenter(Xtr.copy(),ytr.copy(),sd=0.1,col=j) #creating augmented values
    Xt = Xt.append(Xtr1)
    yt = yt.append(pd.Series(ytr1))
print("Augmenting Finished")

d = gf.run_xgboost(Xt,yt,Xte,yte)
print(d)

"""
{'accuracy': 0.882377508399508, 'std': 0.012872278905955601, 'AUC': 0.7476636185875316}
"""

#%%

data  = pd.DataFrame()
for i in range(150):
    print(i)
    Xt1,yt1= augmenter(Xt.copy(),yt.copy(),0.2,i)
    d = gf.run_xgboost(Xt1,yt1,Xte,yte)
    data= data.append(d,ignore_index=True)
    
@author: adityavyas
"""

import global_functions as gf

import pandas as pd

[X_train, X_test, y_train, y_test] = gf.get_dataset()


def run150(df):
    for index in range(150):
        df = gf.add_gaussian_index(df, index)
    return df


data = pd.DataFrame()

runnable = X_train.copy()
runnabletrain = y_train.copy()

for i in range(1, 10):
    df = X_train.copy()
    for j in range(i):
        df = run150(df)
    runnable = pd.concat([runnable, df])
    runnabletrain = pd.concat([runnabletrain, y_train.copy()])
    frame = gf.run_xgboost(runnable, runnabletrain, X_test, y_test)
    data = data.append(frame, ignore_index=True)
    print(frame)