def test_binary(self): print(bin_train_df.head()) drop_list = [] numeric_list = [] cat_list = [] xgb = xgbmagic.Xgb(bin_train_df, target_column='y', id_column='id', numeric_columns=numeric_list, drop_columns=drop_list, categorical_columns=cat_list, num_training_rounds=100, target_type='binary', verbose=True, prefix='test', sample_fraction=0.3, n_samples=2) xgb.train() output = xgb.predict(bin_test_df) print('OUTPUT', output) self.assertTrue(len(output) == len(bin_test_dict['id']))
def test_random_sample(self): n_samples = 3 fraction = 0.2 drop_list = [] numeric_list = [] cat_list = [] xgb = xgbmagic.Xgb(train_df, target_column='y', id_column='id', numeric_columns=numeric_list, drop_columns=drop_list, categorical_columns=cat_list, num_training_rounds=100, target_type='linear', verbose=True, prefix='test', sample_fraction=0.01, n_samples=2) samples = xgb.random_sample(train_df, fraction=fraction, n_samples=n_samples) self.assertTrue(len(samples)==n_samples) self.assertTrue(len(samples[0])==round(fraction*len(train_df)))
def test_output(self): drop_list = [] numeric_list = [] cat_list = [] xgb = xgbmagic.Xgb(train_df, target_column='y', id_column='id', numeric_columns=numeric_list, drop_columns=drop_list, categorical_columns=cat_list, num_training_rounds=100, target_type='linear', verbose=True, prefix='test', sample_fraction=0.3, n_samples=2) xgb.train() output = xgb.predict(test_df) print('OUTPUT', output) self.assertTrue(len(output) == len(test_dict['id'])) multi_outputs = xgb.predict(test_df, return_multi_outputs=True) self.assertTrue(len(multi_outputs) == xgb.n_samples)
def test_init(self): drop_list = [] numeric_list = [] cat_list = [] xgb = xgbmagic.Xgb(train_df, target_column='y', id_column='id', numeric_columns=numeric_list, drop_columns=drop_list, categorical_columns=cat_list, num_training_rounds=100, target_type='linear', verbose=True, prefix='test', sample_fraction=0.01, n_samples=2) self.assertIsInstance(xgb, xgbmagic.Xgb)
import xgbmagic import pandas as pd import pickle # train and test sets taken from Kaggle's Santander Customer Satisfaction challenge df = pd.read_csv('train.csv') xgb = xgbmagic.Xgb(df, target_column='TARGET', id_column='ID', categorical_columns=[], num_training_rounds=1000, target_type='binary', early_stopping_rounds=50) xgb.train() test_df = pd.read_csv('test.csv') print(xgb.feature_importance()) output = xgb.predict(test_df) xgb.write_csv('output-xgbmagic.csv')
import xgbmagic import pandas as pd from sklearn.model_selection import StratifiedKFold, KFold, cross_val_score df = pd.read_csv( '/home/rlougee/Desktop/invitrodb_v2_enrichments/CTEW_aeid_100_invitrodbv2_20180918/CTEW_Results/CT-Enriched_FP_aeid_100_invitrodbv2_20180918.tsv', delimiter='\t') df.iloc[:, 2:] = df.iloc[:, 2:].astype(int) for i in df.columns: f = i f = f.replace('[', '') f = f.replace(']', '') f = f.replace('<', '') if i != f: df = df.rename(columns={i: f}) # classification model = binary target_type = 'binary' # set columns type here xgb = xgbmagic.Xgb(df.iloc[50:, :], target_column='Hit_Call', id_column='Dsstox_Compound_ID', numeric_columns=df.columns[2:]) xgb.train() print(xgb.feature_importance()) print(xgb.predict(df.iloc[:50, :]))