def objective(params):
    print(params)
    total_loss = 0
    for k in range(4):

        URM_train, URM_test, validation_data, test_data = Helper().get_kfold_data(4)[k]

        booster = XGBooster(URM_train, validation_data, HybridElasticNetICFUCF)

        booster.URM_test = URM_test

        booster.fit(train_parameters=deepcopy(params))
        loss, _ = Evaluator(test_mode=True).evaluate_recommender_kfold(booster, test_data, sequential=True)
        total_loss += loss

    total_loss /= 4

    print("Map@10 k-fold score:", total_loss)
    return -total_loss
Esempio n. 2
0
from options import Options
import os
import resource
import sys
from xgbooster import XGBooster


if __name__ == '__main__':
    # parsing command-line options
    options = Options(sys.argv)

    # making output unbuffered
    if sys.version_info.major == 2:
        sys.stdout = os.fdopen(sys.stdout.fileno(), 'w', 0)

    xgb = XGBooster(options, from_model='../temp/compas_data/compas_data_nbestim_50_maxdepth_3_testsplit_0.2.mod.pkl')

    # encode it and save the encoding to another file
    xgb.encode()

    xgb2 = copy.deepcopy(xgb)

    with open('../bench/fairml/compas/compas.samples', 'r') as fp:
        lines = fp.readlines()

    # timers
    ltimes = []
    vtimes = []
    ftimes = []
    etimes = []
Esempio n. 3
0
    if (options.preprocess_categorical):
        preprocess_dataset(options.files[0],
                           options.preprocess_categorical_files)
        exit()

    if options.files:
        xgb = None

        if options.train:
            data = Data(filename=options.files[0],
                        mapfile=options.mapfile,
                        separator=options.separator,
                        use_categorical=options.use_categorical)

            xgb = XGBooster(options, from_data=data)
            train_accuracy, test_accuracy, model = xgb.train()

        # read a sample from options.explain
        if options.explain:
            options.explain = [
                float(v.strip()) for v in options.explain.split(',')
            ]

        if options.encode:
            if not xgb:
                xgb = XGBooster(options, from_model=options.files[0])

            # encode it and save the encoding to another file
            xgb.encode(test_on=options.explain)
import os
import resource
import sys
from xgbooster import XGBooster

if __name__ == '__main__':
    # parsing command-line options
    options = Options(sys.argv)

    # making output unbuffered
    if sys.version_info.major == 2:
        sys.stdout = os.fdopen(sys.stdout.fileno(), 'w', 0)

    xgb = XGBooster(
        options,
        from_model=
        'temp/adult_data/adult_data_nbestim_50_maxdepth_3_testsplit_0.2.mod.pkl'
    )

    # encode it and save the encoding to another file
    xgb.encode()

    with open('../bench/anchor/adult/adult.samples', 'r') as fp:
        lines = fp.readlines()

    # timers
    atimes = []
    vtimes = []
    ftimes = []
    etimes = []
import os
import resource
import sys
from xgbooster import XGBooster

if __name__ == '__main__':
    # parsing command-line options
    options = Options(sys.argv)

    # making output unbuffered
    if sys.version_info.major == 2:
        sys.stdout = os.fdopen(sys.stdout.fileno(), 'w', 0)

    xgb = XGBooster(
        options,
        from_model=
        'temp/recidivism_data/recidivism_data_nbestim_50_maxdepth_3_testsplit_0.2.mod.pkl'
    )

    # encode it and save the encoding to another file
    xgb.encode()

    xgb2 = copy.deepcopy(xgb)

    with open('../bench/anchor/recidivism/recidivism.samples', 'r') as fp:
        lines = fp.readlines()

    # timers
    stimes = []
    vtimes = []
    ftimes = []
Esempio n. 6
0
def enumerate_all(options, xtype, xnum, smallest, usecld, usemhs, useumcs,
                  prefix):
    # setting the right preferences
    options.xtype = xtype
    options.xnum = xnum
    options.reduce = 'lin'
    options.smallest = smallest
    options.usecld = usecld
    options.usemhs = usemhs
    options.useumcs = useumcs

    # reading all unique samples
    with open('../bench/nlp/spam/quant/spam10.samples', 'r') as fp:
        lines = fp.readlines()

    # timers and other variables
    times, calls = [], []
    xsize, exlen = [], []

    # doing everything incrementally is expensive;
    # let's restart the solver for every 10% of instances
    tested = set()
    for i, s in enumerate(lines):
        if i % (len(lines) / 10) == 0:
            # creating a new XGBooster
            xgb = XGBooster(
                options,
                from_model=
                '../temp/spam10_data/spam10_data_nbestim_50_maxdepth_3_testsplit_0.2.mod.pkl'
            )

            # encode it and save the encoding to another file
            xgb.encode()

        options.explain = [float(v.strip()) for v in s.split(',')]

        if tuple(options.explain) in tested:
            continue

        tested.add(tuple(options.explain))
        print(prefix, 'sample {0}: {1}'.format(i, ','.join(s.split(','))))

        # calling anchor
        timer = resource.getrusage(resource.RUSAGE_CHILDREN).ru_utime + \
                resource.getrusage(resource.RUSAGE_SELF).ru_utime

        expls = xgb.explain(options.explain)

        timer = resource.getrusage(resource.RUSAGE_CHILDREN).ru_utime + \
                resource.getrusage(resource.RUSAGE_SELF).ru_utime - timer
        times.append(timer)

        print(prefix, 'expls:', expls)
        print(prefix, 'nof x:', len(expls))
        print(prefix, 'timex: {0:.2f}'.format(timer))
        print(prefix, 'calls:', xgb.x.calls)
        print(prefix, 'Msz x:', max([len(x) for x in expls]))
        print(prefix, 'msz x:', min([len(x) for x in expls]))
        print(
            prefix,
            'asz x: {0:.2f}'.format(sum([len(x) for x in expls]) / len(expls)))
        print('')

        calls.append(xgb.x.calls)
        xsize.append(sum([len(x) for x in expls]) / float(len(expls)))
        exlen.append(len(expls))

    print('')
    print('all samples:', len(lines))

    # reporting the time spent
    print('{0} total time: {1:.2f}'.format(prefix, sum(times)))
    print('{0} max time per instance: {1:.2f}'.format(prefix, max(times)))
    print('{0} min time per instance: {1:.2f}'.format(prefix, min(times)))
    print('{0} avg time per instance: {1:.2f}'.format(prefix,
                                                      sum(times) / len(times)))
    print('{0} total oracle calls: {1}'.format(prefix, sum(calls)))
    print('{0} max oracle calls per instance: {1}'.format(prefix, max(calls)))
    print('{0} min oracle calls per instance: {1}'.format(prefix, min(calls)))
    print('{0} avg oracle calls per instance: {1:.2f}'.format(
        prefix,
        float(sum(calls)) / len(calls)))
    print('{0} avg number of explanations per instance: {1:.2f}'.format(
        prefix,
        float(sum(exlen)) / len(exlen)))
    print('{0} avg explanation size per instance: {1:.2f}'.format(
        prefix,
        float(sum(xsize)) / len(xsize)))
    print('')
Esempio n. 7
0
            if count > 1:
                nof_insts = min(int(count), len(insts))
            else:
                nof_insts = min(int(len(insts) * count), len(insts))
            print(f'considering {nof_insts} instances')

        base = os.path.splitext(os.path.basename(data))[0]
        mfile = 'temp/{0}/{0}_nbestim_{1}_maxdepth_{2}_testsplit_0.2.mod.pkl'.format(
            base, num, adepth)

        slog = open(f'results/smt/{base}.log', 'w')
        mlog = open(f'results/mx/{base}.log', 'w')

        # creating booster objects
        sxgb = XGBooster(soptions, from_model=mfile)
        sxgb.encode(test_on=None)
        mxgb = XGBooster(moptions, from_model=mfile)
        mxgb.encode(test_on=None)

        stimes = []
        mtimes = []
        mcalls = []
        smem = []
        mxmem = []

        #with open("/tmp/texture.samples", 'r') as fp:
        #    insts = [line.strip() for line in fp.readlines()]

        for i, inst in enumerate(insts):
            if i == nof_insts:
Esempio n. 8
0
    if (options.preprocess_categorical):
        preprocess_dataset(options.files[0],
                           options.preprocess_categorical_files)
        exit()

    if options.files:
        xgb = None

        if options.train:
            data = Data(filename=options.files[0],
                        mapfile=options.mapfile,
                        separator=options.separator,
                        use_categorical=options.use_categorical)

            xgb = XGBooster(options, from_data=data)
            train_accuracy, test_accuracy, model = xgb.train()

        # read a sample from options.explain
        if options.explain:
            options.explain = [
                float(v.strip()) for v in options.explain.split(',')
            ]

        if options.encode:
            if not xgb:
                xgb = XGBooster(options, from_model=options.files[0])

            # encode it and save the encoding to another file
            xgb.encode(test_on=options.explain)