def objective(params): print(params) total_loss = 0 for k in range(4): URM_train, URM_test, validation_data, test_data = Helper().get_kfold_data(4)[k] booster = XGBooster(URM_train, validation_data, HybridElasticNetICFUCF) booster.URM_test = URM_test booster.fit(train_parameters=deepcopy(params)) loss, _ = Evaluator(test_mode=True).evaluate_recommender_kfold(booster, test_data, sequential=True) total_loss += loss total_loss /= 4 print("Map@10 k-fold score:", total_loss) return -total_loss
from options import Options import os import resource import sys from xgbooster import XGBooster if __name__ == '__main__': # parsing command-line options options = Options(sys.argv) # making output unbuffered if sys.version_info.major == 2: sys.stdout = os.fdopen(sys.stdout.fileno(), 'w', 0) xgb = XGBooster(options, from_model='../temp/compas_data/compas_data_nbestim_50_maxdepth_3_testsplit_0.2.mod.pkl') # encode it and save the encoding to another file xgb.encode() xgb2 = copy.deepcopy(xgb) with open('../bench/fairml/compas/compas.samples', 'r') as fp: lines = fp.readlines() # timers ltimes = [] vtimes = [] ftimes = [] etimes = []
if (options.preprocess_categorical): preprocess_dataset(options.files[0], options.preprocess_categorical_files) exit() if options.files: xgb = None if options.train: data = Data(filename=options.files[0], mapfile=options.mapfile, separator=options.separator, use_categorical=options.use_categorical) xgb = XGBooster(options, from_data=data) train_accuracy, test_accuracy, model = xgb.train() # read a sample from options.explain if options.explain: options.explain = [ float(v.strip()) for v in options.explain.split(',') ] if options.encode: if not xgb: xgb = XGBooster(options, from_model=options.files[0]) # encode it and save the encoding to another file xgb.encode(test_on=options.explain)
import os import resource import sys from xgbooster import XGBooster if __name__ == '__main__': # parsing command-line options options = Options(sys.argv) # making output unbuffered if sys.version_info.major == 2: sys.stdout = os.fdopen(sys.stdout.fileno(), 'w', 0) xgb = XGBooster( options, from_model= 'temp/adult_data/adult_data_nbestim_50_maxdepth_3_testsplit_0.2.mod.pkl' ) # encode it and save the encoding to another file xgb.encode() with open('../bench/anchor/adult/adult.samples', 'r') as fp: lines = fp.readlines() # timers atimes = [] vtimes = [] ftimes = [] etimes = []
import os import resource import sys from xgbooster import XGBooster if __name__ == '__main__': # parsing command-line options options = Options(sys.argv) # making output unbuffered if sys.version_info.major == 2: sys.stdout = os.fdopen(sys.stdout.fileno(), 'w', 0) xgb = XGBooster( options, from_model= 'temp/recidivism_data/recidivism_data_nbestim_50_maxdepth_3_testsplit_0.2.mod.pkl' ) # encode it and save the encoding to another file xgb.encode() xgb2 = copy.deepcopy(xgb) with open('../bench/anchor/recidivism/recidivism.samples', 'r') as fp: lines = fp.readlines() # timers stimes = [] vtimes = [] ftimes = []
def enumerate_all(options, xtype, xnum, smallest, usecld, usemhs, useumcs, prefix): # setting the right preferences options.xtype = xtype options.xnum = xnum options.reduce = 'lin' options.smallest = smallest options.usecld = usecld options.usemhs = usemhs options.useumcs = useumcs # reading all unique samples with open('../bench/nlp/spam/quant/spam10.samples', 'r') as fp: lines = fp.readlines() # timers and other variables times, calls = [], [] xsize, exlen = [], [] # doing everything incrementally is expensive; # let's restart the solver for every 10% of instances tested = set() for i, s in enumerate(lines): if i % (len(lines) / 10) == 0: # creating a new XGBooster xgb = XGBooster( options, from_model= '../temp/spam10_data/spam10_data_nbestim_50_maxdepth_3_testsplit_0.2.mod.pkl' ) # encode it and save the encoding to another file xgb.encode() options.explain = [float(v.strip()) for v in s.split(',')] if tuple(options.explain) in tested: continue tested.add(tuple(options.explain)) print(prefix, 'sample {0}: {1}'.format(i, ','.join(s.split(',')))) # calling anchor timer = resource.getrusage(resource.RUSAGE_CHILDREN).ru_utime + \ resource.getrusage(resource.RUSAGE_SELF).ru_utime expls = xgb.explain(options.explain) timer = resource.getrusage(resource.RUSAGE_CHILDREN).ru_utime + \ resource.getrusage(resource.RUSAGE_SELF).ru_utime - timer times.append(timer) print(prefix, 'expls:', expls) print(prefix, 'nof x:', len(expls)) print(prefix, 'timex: {0:.2f}'.format(timer)) print(prefix, 'calls:', xgb.x.calls) print(prefix, 'Msz x:', max([len(x) for x in expls])) print(prefix, 'msz x:', min([len(x) for x in expls])) print( prefix, 'asz x: {0:.2f}'.format(sum([len(x) for x in expls]) / len(expls))) print('') calls.append(xgb.x.calls) xsize.append(sum([len(x) for x in expls]) / float(len(expls))) exlen.append(len(expls)) print('') print('all samples:', len(lines)) # reporting the time spent print('{0} total time: {1:.2f}'.format(prefix, sum(times))) print('{0} max time per instance: {1:.2f}'.format(prefix, max(times))) print('{0} min time per instance: {1:.2f}'.format(prefix, min(times))) print('{0} avg time per instance: {1:.2f}'.format(prefix, sum(times) / len(times))) print('{0} total oracle calls: {1}'.format(prefix, sum(calls))) print('{0} max oracle calls per instance: {1}'.format(prefix, max(calls))) print('{0} min oracle calls per instance: {1}'.format(prefix, min(calls))) print('{0} avg oracle calls per instance: {1:.2f}'.format( prefix, float(sum(calls)) / len(calls))) print('{0} avg number of explanations per instance: {1:.2f}'.format( prefix, float(sum(exlen)) / len(exlen))) print('{0} avg explanation size per instance: {1:.2f}'.format( prefix, float(sum(xsize)) / len(xsize))) print('')
if count > 1: nof_insts = min(int(count), len(insts)) else: nof_insts = min(int(len(insts) * count), len(insts)) print(f'considering {nof_insts} instances') base = os.path.splitext(os.path.basename(data))[0] mfile = 'temp/{0}/{0}_nbestim_{1}_maxdepth_{2}_testsplit_0.2.mod.pkl'.format( base, num, adepth) slog = open(f'results/smt/{base}.log', 'w') mlog = open(f'results/mx/{base}.log', 'w') # creating booster objects sxgb = XGBooster(soptions, from_model=mfile) sxgb.encode(test_on=None) mxgb = XGBooster(moptions, from_model=mfile) mxgb.encode(test_on=None) stimes = [] mtimes = [] mcalls = [] smem = [] mxmem = [] #with open("/tmp/texture.samples", 'r') as fp: # insts = [line.strip() for line in fp.readlines()] for i, inst in enumerate(insts): if i == nof_insts: