def run_test(dataset, names, sampling_rate=2., n_test=10, alpha=1): results = [] rule_maxlen = 3 _lambda = 10 if dataset in {'iris', 'breast_cancer', 'wine', 'pima'} else 50 for name in names: model_file = get_path('models', name + '.mdl') tmp_file = get_path('experiments', name + '.json') if file_exists(tmp_file): obj = json2dict(tmp_file) results.append(obj) continue fidelities = [] self_fidelities = [] accs = [] seconds = [] list_lengths = [] for i in range(n_test): print('test', i) start = time.time() fidelity, acc, self_fidelity, n_rules = train_surrogate(model_file, surrogate='rule', sampling_rate=sampling_rate, iters=100000, rule_maxlen=rule_maxlen, alpha=alpha, min_support=0.05, _lambda=_lambda) seconds.append(time.time() - start) print('time: {}s; length: {}'.format(seconds[-1], n_rules)) list_lengths.append(n_rules) self_fidelities.append(self_fidelity) fidelities.append(fidelity) accs.append(acc) std_acc = float(np.std(accs)) mean_acc = float(np.mean(accs)) max_acc = float(np.max(accs)) min_acc = float(np.min(accs)) std_fidelity = float(np.std(fidelities)) mean_fidelity = float(np.mean(fidelities)) max_fidelity = float(np.max(fidelities)) min_fidelity = float(np.min(fidelities)) std_self_fidelity = float(np.std(self_fidelities)) mean_self_fidelity = float(np.mean(self_fidelities)) max_self_fidelity = float(np.max(self_fidelities)) min_self_fidelity = float(np.min(self_fidelities)) mean_time = float(np.mean(seconds)) mean_length = float(np.mean(list_lengths)) obj = {'fidelity': fidelities, 'acc': accs, 'std_acc': std_acc, 'mean_acc': mean_acc, 'min_acc': min_acc, 'max_acc': max_acc, 'std_fidelity': std_fidelity, 'mean_fidelity': mean_fidelity, 'min_fidelity': min_fidelity, 'max_fidelity': max_fidelity, 'std_self_fidelity': std_self_fidelity, 'mean_self_fidelity': mean_self_fidelity, 'min_self_fidelity': min_self_fidelity, 'max_self_fidelity': max_self_fidelity, 'time': seconds, 'mean_time': mean_time, 'lengths': list_lengths, 'mean_length': mean_length} print(dataset) print(name) print(obj) print('---------') dict2json(obj, tmp_file) results.append(obj) return results
def main(): # train_nn(dataset='abalone2', neurons=(40, 40), tol=1e-6, alpha=0.0001) accs = [] losses = [] aucs = [] for i in range(10): # acc, loss, auc=train_nn(dataset='pima', neurons=(20, 20), tol=1e-5, alpha=1.0, sample=False, name='original') acc, loss, auc = train_nn(dataset='pima', neurons=(20, 20), tol=1e-5, alpha=1.0, sample=True, name='sample') # acc, loss, auc = train_svm(dataset='pima', C=0.1) accs.append(acc) losses.append(loss) aucs.append(auc) print(np.mean(accs)) print(np.std(accs)) print(np.min(accs)) print(np.max(accs)) dict2json({'loss': losses, 'acc': accs, 'auc': aucs}, 'case-pima-nn-2-sample2.json')
def test_sampling_rate(dataset='abalone3'): n_test = 10 # max_rulelens = [2, 2, 2, 3, 3, 3] neurons = (50, 50, 50, 50) sampling_rates = [0.25, 0.5, 1, 2, 4, 8] performance_dict = {} model_name = '-'.join([dataset, 'nn'] + [str(i) for i in neurons]) for sampling_rate in sampling_rates: file_name = get_path('experiments', '-'.join(['sample', dataset, str(sampling_rate), '.json'])) if file_exists(file_name): results = json2dict(file_name) else: results = run_test(dataset, [model_name], sampling_rate, n_test=n_test) dict2json(results, file_name) performance_dict[dataset] = results dict2json(performance_dict, '-'.join(['sample', dataset, '.json'])) return
def main(): # train_nn(dataset='abalone2', neurons=(40, 40), tol=1e-6, alpha=0.0001) accs = [] losses = [] aucs = [] for i in range(10): acc, loss, auc = train_nn(dataset='abalone2', neurons=(40, ), tol=1e-6, alpha=0.001) accs.append(acc) losses.append(loss) aucs.append(auc) print(np.mean(accs)) print(np.std(accs)) print(np.mean(aucs)) print(np.std(aucs)) dict2json({ 'loss': losses, 'acc': accs, 'auc': aucs }, 'case1-abalone-no-sample.json')
def test(target='svm'): n_test = 10 # max_rulelens = [2, 2, 2, 3, 3, 3] performance_dict = {} if target == 'nn': nns = train_all_nn() for i, nn_names in enumerate(nns): dataset = datasets[i] sampling_rate = 2 if dataset in {'adult'} else 4. # max_rulelen = max_rulelens[i] # performance_dict file_name = get_path('experiments', dataset + '-nn.json') if file_exists(file_name): results = json2dict(file_name) else: results = run_test(dataset, nn_names, sampling_rate=sampling_rate, n_test=n_test, alpha=0) dict2json(results, file_name) performance_dict[dataset] = results dict2json(performance_dict, 'results-nn.json') return performance_dict = {} svms = train_all_svm() print(svms) for i, svm_name in enumerate(svms): dataset = datasets[i] # max_rulelen = max_rulelens[i] # performance_dict sampling_rate = 2 if dataset in {'adult'} else 4. file_name = get_path('experiments', dataset + '-svm.json') if file_exists(file_name): results = json2dict(file_name) else: results = run_test(dataset, [svm_name], sampling_rate=sampling_rate, n_test=n_test, alpha=0) dict2json(results, file_name) performance_dict[dataset] = results dict2json(performance_dict, 'results-svm.json')