Beispiel #1
0
        if tuple(options.explain) in tested:
            continue

        tested.add(tuple(options.explain))
        print('sample {0}: {1}'.format(i, ','.join(s.split(','))))

        # first, compute a valid explanation to guarantee
        # that an explanation of this size exists
        expl = xgb2.explain(options.explain)

        # calling lime
        timer = resource.getrusage(resource.RUSAGE_CHILDREN).ru_utime + \
                resource.getrusage(resource.RUSAGE_SELF).ru_utime

        expl = xgb.explain(options.explain, use_lime=lime_call, nof_feats=len(expl))

        print('expl1:', expl)
        print('szex1:', len(expl))

        timer = resource.getrusage(resource.RUSAGE_CHILDREN).ru_utime + \
                resource.getrusage(resource.RUSAGE_SELF).ru_utime - timer
        ltimes.append(timer)

        # validating explanation of lime
        timer = resource.getrusage(resource.RUSAGE_CHILDREN).ru_utime + \
                resource.getrusage(resource.RUSAGE_SELF).ru_utime

        coex = xgb.validate(options.explain, expl)

        timer = resource.getrusage(resource.RUSAGE_CHILDREN).ru_utime + \
    errors = []
    reduced = 0
    for i, s in enumerate(lines):
        options.explain = [float(v.strip()) for v in s.split(',')]

        if tuple(options.explain) in tested:
            continue

        tested.add(tuple(options.explain))
        print('sample {0}: {1}'.format(i, ','.join(s.split(','))))

        # calling anchor
        timer = resource.getrusage(resource.RUSAGE_CHILDREN).ru_utime + \
                resource.getrusage(resource.RUSAGE_SELF).ru_utime

        expl = xgb.explain(options.explain, use_anchor=anchor_call)

        print('expl1:', expl)
        print('szex1:', len(expl))

        timer = resource.getrusage(resource.RUSAGE_CHILDREN).ru_utime + \
                resource.getrusage(resource.RUSAGE_SELF).ru_utime - timer
        atimes.append(timer)

        # validating explanation of anchor
        timer = resource.getrusage(resource.RUSAGE_CHILDREN).ru_utime + \
                resource.getrusage(resource.RUSAGE_SELF).ru_utime

        coex = xgb.validate(options.explain, expl)

        timer = resource.getrusage(resource.RUSAGE_CHILDREN).ru_utime + \
Beispiel #3
0
            xgb = XGBooster(options, from_data=data)
            train_accuracy, test_accuracy, model = xgb.train()

        # read a sample from options.explain
        if options.explain:
            options.explain = [
                float(v.strip()) for v in options.explain.split(',')
            ]

        if options.encode:
            if not xgb:
                xgb = XGBooster(options, from_model=options.files[0])

            # encode it and save the encoding to another file
            xgb.encode(test_on=options.explain)

        if options.explain:
            if not xgb:
                # abduction-based approach requires an encoding
                xgb = XGBooster(options, from_encoding=options.files[0])
            if (options.encode == "ortools"):
                expl = xgb.explain_ortools(options.explain)
            else:
                # exp0lain using anchor or the abduction-based approach
                expl = xgb.explain(options.explain)

            # here we take only first explanation if case enumeration was done
            if options.xnum != 1:
                expl = expl[0]
Beispiel #4
0
def enumerate_all(options, xtype, xnum, smallest, usecld, usemhs, useumcs,
                  prefix):
    # setting the right preferences
    options.xtype = xtype
    options.xnum = xnum
    options.reduce = 'lin'
    options.smallest = smallest
    options.usecld = usecld
    options.usemhs = usemhs
    options.useumcs = useumcs

    # reading all unique samples
    with open('../bench/nlp/spam/quant/spam10.samples', 'r') as fp:
        lines = fp.readlines()

    # timers and other variables
    times, calls = [], []
    xsize, exlen = [], []

    # doing everything incrementally is expensive;
    # let's restart the solver for every 10% of instances
    tested = set()
    for i, s in enumerate(lines):
        if i % (len(lines) / 10) == 0:
            # creating a new XGBooster
            xgb = XGBooster(
                options,
                from_model=
                '../temp/spam10_data/spam10_data_nbestim_50_maxdepth_3_testsplit_0.2.mod.pkl'
            )

            # encode it and save the encoding to another file
            xgb.encode()

        options.explain = [float(v.strip()) for v in s.split(',')]

        if tuple(options.explain) in tested:
            continue

        tested.add(tuple(options.explain))
        print(prefix, 'sample {0}: {1}'.format(i, ','.join(s.split(','))))

        # calling anchor
        timer = resource.getrusage(resource.RUSAGE_CHILDREN).ru_utime + \
                resource.getrusage(resource.RUSAGE_SELF).ru_utime

        expls = xgb.explain(options.explain)

        timer = resource.getrusage(resource.RUSAGE_CHILDREN).ru_utime + \
                resource.getrusage(resource.RUSAGE_SELF).ru_utime - timer
        times.append(timer)

        print(prefix, 'expls:', expls)
        print(prefix, 'nof x:', len(expls))
        print(prefix, 'timex: {0:.2f}'.format(timer))
        print(prefix, 'calls:', xgb.x.calls)
        print(prefix, 'Msz x:', max([len(x) for x in expls]))
        print(prefix, 'msz x:', min([len(x) for x in expls]))
        print(
            prefix,
            'asz x: {0:.2f}'.format(sum([len(x) for x in expls]) / len(expls)))
        print('')

        calls.append(xgb.x.calls)
        xsize.append(sum([len(x) for x in expls]) / float(len(expls)))
        exlen.append(len(expls))

    print('')
    print('all samples:', len(lines))

    # reporting the time spent
    print('{0} total time: {1:.2f}'.format(prefix, sum(times)))
    print('{0} max time per instance: {1:.2f}'.format(prefix, max(times)))
    print('{0} min time per instance: {1:.2f}'.format(prefix, min(times)))
    print('{0} avg time per instance: {1:.2f}'.format(prefix,
                                                      sum(times) / len(times)))
    print('{0} total oracle calls: {1}'.format(prefix, sum(calls)))
    print('{0} max oracle calls per instance: {1}'.format(prefix, max(calls)))
    print('{0} min oracle calls per instance: {1}'.format(prefix, min(calls)))
    print('{0} avg oracle calls per instance: {1:.2f}'.format(
        prefix,
        float(sum(calls)) / len(calls)))
    print('{0} avg number of explanations per instance: {1:.2f}'.format(
        prefix,
        float(sum(exlen)) / len(exlen)))
    print('{0} avg explanation size per instance: {1:.2f}'.format(
        prefix,
        float(sum(xsize)) / len(xsize)))
    print('')
Beispiel #5
0
        mcalls = []
        smem = []
        mxmem = []

        #with open("/tmp/texture.samples", 'r') as fp:
        #    insts = [line.strip() for line in fp.readlines()]

        for i, inst in enumerate(insts):
            if i == nof_insts:
                break

            # processing the instance
            soptions.explain = [float(v.strip()) for v in inst.split(',')]
            moptions.explain = [float(v.strip()) for v in inst.split(',')]

            expl1 = sxgb.explain(soptions.explain)

            print(f'i: {inst}', file=slog)
            print(f's: {len(expl1)}', file=slog)
            print(f't: {sxgb.x.time:.3f}', file=slog)
            print('', file=slog)

            smem.append(round(sxgb.x.used_mem / 1024.0, 3))
            stimes.append(sxgb.x.time)

            slog.flush()
            sys.stdout.flush()

            expl2 = mxgb.explain(moptions.explain)
            print(f'i: {inst}', file=mlog)
            print(f's: {len(expl2[0])}', file=mlog)
Beispiel #6
0
        if options.encode:
            if not xgb:
                xgb = XGBooster(options, from_model=options.files[0])

            # encode it and save the encoding to another file
            xgb.encode(test_on=options.explain)

        if options.explain:
            if not xgb:
                if options.uselime or options.useanchor or options.useshap:
                    xgb = XGBooster(options, from_model=options.files[0])
                else:
                    # abduction-based approach requires an encoding
                    xgb = XGBooster(options, from_encoding=options.files[0])

            # checking LIME or SHAP should use all features
            if not options.limefeats:
                options.limefeats = len(data.names) - 1

            # explain using anchor or the abduction-based approach
            expl = xgb.explain(
                options.explain,
                use_lime=lime_call if options.uselime else None,
                use_anchor=anchor_call if options.useanchor else None,
                use_shap=shap_call if options.useshap else None,
                nof_feats=options.limefeats)

            if (options.uselime or options.useanchor
                    or options.useshap) and options.validate:
                xgb.validate(options.explain, expl)