Exemple #1
0
def run_test(dataset, names, sampling_rate=2., n_test=10, alpha=1):
    results = []
    rule_maxlen = 3
    _lambda = 10 if dataset in {'iris', 'breast_cancer', 'wine', 'pima'} else 50
    for name in names:
        model_file = get_path('models', name + '.mdl')
        tmp_file = get_path('experiments', name + '.json')
        if file_exists(tmp_file):
            obj = json2dict(tmp_file)
            results.append(obj)
            continue
        fidelities = []
        self_fidelities = []
        accs = []
        seconds = []
        list_lengths = []
        for i in range(n_test):
            print('test', i)
            start = time.time()
            fidelity, acc, self_fidelity, n_rules = train_surrogate(model_file, surrogate='rule',
                                                                    sampling_rate=sampling_rate, iters=100000,
                                                                    rule_maxlen=rule_maxlen, alpha=alpha,
                                                                    min_support=0.05, _lambda=_lambda)
            seconds.append(time.time() - start)
            print('time: {}s; length: {}'.format(seconds[-1], n_rules))
            list_lengths.append(n_rules)
            self_fidelities.append(self_fidelity)
            fidelities.append(fidelity)
            accs.append(acc)

        std_acc = float(np.std(accs))
        mean_acc = float(np.mean(accs))
        max_acc = float(np.max(accs))
        min_acc = float(np.min(accs))
        std_fidelity = float(np.std(fidelities))
        mean_fidelity = float(np.mean(fidelities))
        max_fidelity = float(np.max(fidelities))
        min_fidelity = float(np.min(fidelities))
        std_self_fidelity = float(np.std(self_fidelities))
        mean_self_fidelity = float(np.mean(self_fidelities))
        max_self_fidelity = float(np.max(self_fidelities))
        min_self_fidelity = float(np.min(self_fidelities))
        mean_time = float(np.mean(seconds))
        mean_length = float(np.mean(list_lengths))
        obj = {'fidelity': fidelities, 'acc': accs,
               'std_acc': std_acc, 'mean_acc': mean_acc, 'min_acc': min_acc, 'max_acc': max_acc,
               'std_fidelity': std_fidelity, 'mean_fidelity': mean_fidelity,
               'min_fidelity': min_fidelity, 'max_fidelity': max_fidelity,
               'std_self_fidelity': std_self_fidelity, 'mean_self_fidelity': mean_self_fidelity,
               'min_self_fidelity': min_self_fidelity, 'max_self_fidelity': max_self_fidelity,
               'time': seconds, 'mean_time': mean_time, 'lengths': list_lengths, 'mean_length': mean_length}
        print(dataset)
        print(name)
        print(obj)
        print('---------')
        dict2json(obj, tmp_file)
        results.append(obj)
    return results
Exemple #2
0
def main():
    # train_nn(dataset='abalone2', neurons=(40, 40), tol=1e-6, alpha=0.0001)
    accs = []
    losses = []
    aucs = []
    for i in range(10):
        # acc, loss, auc=train_nn(dataset='pima', neurons=(20, 20), tol=1e-5, alpha=1.0, sample=False, name='original')
        acc, loss, auc = train_nn(dataset='pima', neurons=(20, 20), tol=1e-5, alpha=1.0, sample=True, name='sample')
        # acc, loss, auc = train_svm(dataset='pima', C=0.1)
        accs.append(acc)
        losses.append(loss)
        aucs.append(auc)

    print(np.mean(accs))
    print(np.std(accs))
    print(np.min(accs))
    print(np.max(accs))
    dict2json({'loss': losses, 'acc': accs, 'auc': aucs}, 'case-pima-nn-2-sample2.json')
Exemple #3
0
def test_sampling_rate(dataset='abalone3'):
    n_test = 10
    # max_rulelens = [2, 2, 2, 3, 3, 3]
    neurons = (50, 50, 50, 50)
    sampling_rates = [0.25, 0.5, 1, 2, 4, 8]
    performance_dict = {}
    model_name = '-'.join([dataset, 'nn'] + [str(i) for i in neurons])
    for sampling_rate in sampling_rates:
        file_name = get_path('experiments', '-'.join(['sample', dataset, str(sampling_rate), '.json']))
        if file_exists(file_name):
            results = json2dict(file_name)
        else:
            results = run_test(dataset, [model_name], sampling_rate, n_test=n_test)
            dict2json(results, file_name)
        performance_dict[dataset] = results

    dict2json(performance_dict, '-'.join(['sample', dataset, '.json']))
    return
Exemple #4
0
def main():
    # train_nn(dataset='abalone2', neurons=(40, 40), tol=1e-6, alpha=0.0001)
    accs = []
    losses = []
    aucs = []
    for i in range(10):
        acc, loss, auc = train_nn(dataset='abalone2',
                                  neurons=(40, ),
                                  tol=1e-6,
                                  alpha=0.001)
        accs.append(acc)
        losses.append(loss)
        aucs.append(auc)

    print(np.mean(accs))
    print(np.std(accs))
    print(np.mean(aucs))
    print(np.std(aucs))
    dict2json({
        'loss': losses,
        'acc': accs,
        'auc': aucs
    }, 'case1-abalone-no-sample.json')
Exemple #5
0
def test(target='svm'):

    n_test = 10
    # max_rulelens = [2, 2, 2, 3, 3, 3]
    performance_dict = {}
    if target == 'nn':
        nns = train_all_nn()
        for i, nn_names in enumerate(nns):
            dataset = datasets[i]
            sampling_rate = 2 if dataset in {'adult'} else 4.
            # max_rulelen = max_rulelens[i]
            # performance_dict
            file_name = get_path('experiments', dataset + '-nn.json')
            if file_exists(file_name):
                results = json2dict(file_name)
            else:
                results = run_test(dataset, nn_names, sampling_rate=sampling_rate, n_test=n_test, alpha=0)
                dict2json(results, file_name)
            performance_dict[dataset] = results
 
        dict2json(performance_dict, 'results-nn.json')
        return
    performance_dict = {}
    svms = train_all_svm()
    print(svms)
    for i, svm_name in enumerate(svms):
        dataset = datasets[i]
        # max_rulelen = max_rulelens[i]
        # performance_dict
        sampling_rate = 2 if dataset in {'adult'} else 4.
        file_name = get_path('experiments', dataset + '-svm.json')
        if file_exists(file_name):
            results = json2dict(file_name)
        else:
            results = run_test(dataset, [svm_name], sampling_rate=sampling_rate, n_test=n_test, alpha=0)
            dict2json(results, file_name)
        performance_dict[dataset] = results

    dict2json(performance_dict, 'results-svm.json')