Example #1
0
 def test_binary(self):
     print(bin_train_df.head())
     drop_list = []
     numeric_list = []
     cat_list = []
     xgb = xgbmagic.Xgb(bin_train_df, target_column='y', id_column='id', numeric_columns=numeric_list, drop_columns=drop_list, categorical_columns=cat_list, num_training_rounds=100, target_type='binary', verbose=True, prefix='test', sample_fraction=0.3, n_samples=2)
     xgb.train()
     output = xgb.predict(bin_test_df)
     print('OUTPUT', output)
     self.assertTrue(len(output) == len(bin_test_dict['id']))
Example #2
0
 def test_random_sample(self):
     n_samples = 3
     fraction = 0.2
     drop_list = []
     numeric_list = []
     cat_list = []
     xgb = xgbmagic.Xgb(train_df, target_column='y', id_column='id', numeric_columns=numeric_list, drop_columns=drop_list, categorical_columns=cat_list, num_training_rounds=100, target_type='linear', verbose=True, prefix='test', sample_fraction=0.01, n_samples=2)
     samples = xgb.random_sample(train_df, fraction=fraction, n_samples=n_samples)
     self.assertTrue(len(samples)==n_samples)
     self.assertTrue(len(samples[0])==round(fraction*len(train_df)))
Example #3
0
 def test_output(self):
     drop_list = []
     numeric_list = []
     cat_list = []
     xgb = xgbmagic.Xgb(train_df, target_column='y', id_column='id', numeric_columns=numeric_list, drop_columns=drop_list, categorical_columns=cat_list, num_training_rounds=100, target_type='linear', verbose=True, prefix='test', sample_fraction=0.3, n_samples=2)
     xgb.train()
     output = xgb.predict(test_df)
     print('OUTPUT', output)
     self.assertTrue(len(output) == len(test_dict['id']))
     multi_outputs = xgb.predict(test_df, return_multi_outputs=True)
     self.assertTrue(len(multi_outputs) == xgb.n_samples)
Example #4
0
 def test_init(self):
     drop_list = []
     numeric_list = []
     cat_list = []
     xgb = xgbmagic.Xgb(train_df,
                        target_column='y',
                        id_column='id',
                        numeric_columns=numeric_list,
                        drop_columns=drop_list,
                        categorical_columns=cat_list,
                        num_training_rounds=100,
                        target_type='linear',
                        verbose=True,
                        prefix='test',
                        sample_fraction=0.01,
                        n_samples=2)
     self.assertIsInstance(xgb, xgbmagic.Xgb)
Example #5
0
import xgbmagic
import pandas as pd
import pickle

# train and test sets taken from Kaggle's Santander Customer Satisfaction challenge
df = pd.read_csv('train.csv')

xgb = xgbmagic.Xgb(df,
                   target_column='TARGET',
                   id_column='ID',
                   categorical_columns=[],
                   num_training_rounds=1000,
                   target_type='binary',
                   early_stopping_rounds=50)
xgb.train()

test_df = pd.read_csv('test.csv')
print(xgb.feature_importance())
output = xgb.predict(test_df)
xgb.write_csv('output-xgbmagic.csv')
Example #6
0
import xgbmagic
import pandas as pd
from sklearn.model_selection import StratifiedKFold, KFold, cross_val_score

df = pd.read_csv(
    '/home/rlougee/Desktop/invitrodb_v2_enrichments/CTEW_aeid_100_invitrodbv2_20180918/CTEW_Results/CT-Enriched_FP_aeid_100_invitrodbv2_20180918.tsv',
    delimiter='\t')
df.iloc[:, 2:] = df.iloc[:, 2:].astype(int)

for i in df.columns:
    f = i
    f = f.replace('[', '')
    f = f.replace(']', '')
    f = f.replace('<', '')
    if i != f:
        df = df.rename(columns={i: f})

# classification model = binary
target_type = 'binary'

# set columns type here
xgb = xgbmagic.Xgb(df.iloc[50:, :],
                   target_column='Hit_Call',
                   id_column='Dsstox_Compound_ID',
                   numeric_columns=df.columns[2:])
xgb.train()
print(xgb.feature_importance())
print(xgb.predict(df.iloc[:50, :]))