if tuple(options.explain) in tested: continue tested.add(tuple(options.explain)) print('sample {0}: {1}'.format(i, ','.join(s.split(',')))) # first, compute a valid explanation to guarantee # that an explanation of this size exists expl = xgb2.explain(options.explain) # calling lime timer = resource.getrusage(resource.RUSAGE_CHILDREN).ru_utime + \ resource.getrusage(resource.RUSAGE_SELF).ru_utime expl = xgb.explain(options.explain, use_lime=lime_call, nof_feats=len(expl)) print('expl1:', expl) print('szex1:', len(expl)) timer = resource.getrusage(resource.RUSAGE_CHILDREN).ru_utime + \ resource.getrusage(resource.RUSAGE_SELF).ru_utime - timer ltimes.append(timer) # validating explanation of lime timer = resource.getrusage(resource.RUSAGE_CHILDREN).ru_utime + \ resource.getrusage(resource.RUSAGE_SELF).ru_utime coex = xgb.validate(options.explain, expl) timer = resource.getrusage(resource.RUSAGE_CHILDREN).ru_utime + \
errors = [] reduced = 0 for i, s in enumerate(lines): options.explain = [float(v.strip()) for v in s.split(',')] if tuple(options.explain) in tested: continue tested.add(tuple(options.explain)) print('sample {0}: {1}'.format(i, ','.join(s.split(',')))) # calling anchor timer = resource.getrusage(resource.RUSAGE_CHILDREN).ru_utime + \ resource.getrusage(resource.RUSAGE_SELF).ru_utime expl = xgb.explain(options.explain, use_anchor=anchor_call) print('expl1:', expl) print('szex1:', len(expl)) timer = resource.getrusage(resource.RUSAGE_CHILDREN).ru_utime + \ resource.getrusage(resource.RUSAGE_SELF).ru_utime - timer atimes.append(timer) # validating explanation of anchor timer = resource.getrusage(resource.RUSAGE_CHILDREN).ru_utime + \ resource.getrusage(resource.RUSAGE_SELF).ru_utime coex = xgb.validate(options.explain, expl) timer = resource.getrusage(resource.RUSAGE_CHILDREN).ru_utime + \
xgb = XGBooster(options, from_data=data) train_accuracy, test_accuracy, model = xgb.train() # read a sample from options.explain if options.explain: options.explain = [ float(v.strip()) for v in options.explain.split(',') ] if options.encode: if not xgb: xgb = XGBooster(options, from_model=options.files[0]) # encode it and save the encoding to another file xgb.encode(test_on=options.explain) if options.explain: if not xgb: # abduction-based approach requires an encoding xgb = XGBooster(options, from_encoding=options.files[0]) if (options.encode == "ortools"): expl = xgb.explain_ortools(options.explain) else: # exp0lain using anchor or the abduction-based approach expl = xgb.explain(options.explain) # here we take only first explanation if case enumeration was done if options.xnum != 1: expl = expl[0]
def enumerate_all(options, xtype, xnum, smallest, usecld, usemhs, useumcs, prefix): # setting the right preferences options.xtype = xtype options.xnum = xnum options.reduce = 'lin' options.smallest = smallest options.usecld = usecld options.usemhs = usemhs options.useumcs = useumcs # reading all unique samples with open('../bench/nlp/spam/quant/spam10.samples', 'r') as fp: lines = fp.readlines() # timers and other variables times, calls = [], [] xsize, exlen = [], [] # doing everything incrementally is expensive; # let's restart the solver for every 10% of instances tested = set() for i, s in enumerate(lines): if i % (len(lines) / 10) == 0: # creating a new XGBooster xgb = XGBooster( options, from_model= '../temp/spam10_data/spam10_data_nbestim_50_maxdepth_3_testsplit_0.2.mod.pkl' ) # encode it and save the encoding to another file xgb.encode() options.explain = [float(v.strip()) for v in s.split(',')] if tuple(options.explain) in tested: continue tested.add(tuple(options.explain)) print(prefix, 'sample {0}: {1}'.format(i, ','.join(s.split(',')))) # calling anchor timer = resource.getrusage(resource.RUSAGE_CHILDREN).ru_utime + \ resource.getrusage(resource.RUSAGE_SELF).ru_utime expls = xgb.explain(options.explain) timer = resource.getrusage(resource.RUSAGE_CHILDREN).ru_utime + \ resource.getrusage(resource.RUSAGE_SELF).ru_utime - timer times.append(timer) print(prefix, 'expls:', expls) print(prefix, 'nof x:', len(expls)) print(prefix, 'timex: {0:.2f}'.format(timer)) print(prefix, 'calls:', xgb.x.calls) print(prefix, 'Msz x:', max([len(x) for x in expls])) print(prefix, 'msz x:', min([len(x) for x in expls])) print( prefix, 'asz x: {0:.2f}'.format(sum([len(x) for x in expls]) / len(expls))) print('') calls.append(xgb.x.calls) xsize.append(sum([len(x) for x in expls]) / float(len(expls))) exlen.append(len(expls)) print('') print('all samples:', len(lines)) # reporting the time spent print('{0} total time: {1:.2f}'.format(prefix, sum(times))) print('{0} max time per instance: {1:.2f}'.format(prefix, max(times))) print('{0} min time per instance: {1:.2f}'.format(prefix, min(times))) print('{0} avg time per instance: {1:.2f}'.format(prefix, sum(times) / len(times))) print('{0} total oracle calls: {1}'.format(prefix, sum(calls))) print('{0} max oracle calls per instance: {1}'.format(prefix, max(calls))) print('{0} min oracle calls per instance: {1}'.format(prefix, min(calls))) print('{0} avg oracle calls per instance: {1:.2f}'.format( prefix, float(sum(calls)) / len(calls))) print('{0} avg number of explanations per instance: {1:.2f}'.format( prefix, float(sum(exlen)) / len(exlen))) print('{0} avg explanation size per instance: {1:.2f}'.format( prefix, float(sum(xsize)) / len(xsize))) print('')
mcalls = [] smem = [] mxmem = [] #with open("/tmp/texture.samples", 'r') as fp: # insts = [line.strip() for line in fp.readlines()] for i, inst in enumerate(insts): if i == nof_insts: break # processing the instance soptions.explain = [float(v.strip()) for v in inst.split(',')] moptions.explain = [float(v.strip()) for v in inst.split(',')] expl1 = sxgb.explain(soptions.explain) print(f'i: {inst}', file=slog) print(f's: {len(expl1)}', file=slog) print(f't: {sxgb.x.time:.3f}', file=slog) print('', file=slog) smem.append(round(sxgb.x.used_mem / 1024.0, 3)) stimes.append(sxgb.x.time) slog.flush() sys.stdout.flush() expl2 = mxgb.explain(moptions.explain) print(f'i: {inst}', file=mlog) print(f's: {len(expl2[0])}', file=mlog)
if options.encode: if not xgb: xgb = XGBooster(options, from_model=options.files[0]) # encode it and save the encoding to another file xgb.encode(test_on=options.explain) if options.explain: if not xgb: if options.uselime or options.useanchor or options.useshap: xgb = XGBooster(options, from_model=options.files[0]) else: # abduction-based approach requires an encoding xgb = XGBooster(options, from_encoding=options.files[0]) # checking LIME or SHAP should use all features if not options.limefeats: options.limefeats = len(data.names) - 1 # explain using anchor or the abduction-based approach expl = xgb.explain( options.explain, use_lime=lime_call if options.uselime else None, use_anchor=anchor_call if options.useanchor else None, use_shap=shap_call if options.useshap else None, nof_feats=options.limefeats) if (options.uselime or options.useanchor or options.useshap) and options.validate: xgb.validate(options.explain, expl)