def run(i_cv): N_SIG = 15000 N_BKG = N_SIG N_SAMPLES = N_SIG + N_BKG mix = N_SIG / N_SAMPLES model_name = 'GradientBoosting' directory = os.path.join(DIRECTORY, f'cv_{i_cv}') os.makedirs(directory, exist_ok=True) print(f'running iter {i_cv}...') results = {'i_cv': i_cv} seed = SEED + 5 * i_cv train_seed = seed test_seed = seed + 1 # Generate training data generator = Generator(train_seed) z_train = generator.sample_nuisance(N_SIG) X_train, y_train = generator.sample_event(z_train, mix, N_SAMPLES) # Train classifier model = GradientBoostingClassifier(n_estimators=400, learning_rate=5e-2) model.fit(X_train, y_train) # Generate testing data generator = Generator(test_seed) z_test = generator.sample_nuisance(N_SIG) X_test, y_test = generator.sample_event(z_test, mix, N_SAMPLES) # Evaluation r = evaluate_classifier(model, X_train, y_train, prefix='train', model_name=model_name, directory=directory) results.update(r) r = evaluate_classifier(model, X_test, y_test, prefix='test', model_name=model_name, directory=directory) results.update(r) evaluate_pivotal(model, generator, prefix='test', model_name=model_name, directory=directory) return results
def evaluate_parameters(): X,y = get_train_data(limit=25) scores = [] scores_std = [] print('Start learning...') forests = [70] rbm_components = [1100] rbm_learning_rate = [0.06] rbm_n_iter = [20] it = itertools.product(forests,rbm_components,rbm_learning_rate,rbm_n_iter) for (trees,components,learning_rate,n_iter) in it: classifier = get_classifier(trees,components,learning_rate,n_iter) name = "plots_pipeline/pipeline_{}.png".format(trees) e.evaluate_classifier(classifier,X,y, name=name)
def evaluate_parameters(): X, y = get_train_data(limit=25) scores = [] scores_std = [] print('Start learning...') forests = [70] rbm_components = [1100] rbm_learning_rate = [0.06] rbm_n_iter = [20] it = itertools.product(forests, rbm_components, rbm_learning_rate, rbm_n_iter) for (trees, components, learning_rate, n_iter) in it: classifier = get_classifier(trees, components, learning_rate, n_iter) name = "plots_pipeline/pipeline_{}.png".format(trees) e.evaluate_classifier(classifier, X, y, name=name)
def run_arc_test(X_train, y_train, X_test, y_test, params, algo_name): X_train_nn = np.array(X_train).T y_train_nn = np.array(y_train).reshape(-1,1).T X_test_nn = np.array(X_test).T y_test_nn = np.array(y_test).reshape(-1,1).T for layer_dims in params['arcs']: layer_dims = list(layer_dims) y_score, param_dict, costs = simple_ann.train_network(X_train_nn, y_train_nn, X_test_nn, y_test_nn, layer_dims, num_iterations=params['num_iter'], num_checkpoints=params['num_iter']/params['checkpoints'], c_plot=False, learning_rate=params['learning_rate'], learn_adjust = params['learn_adjust'], weights = params['weights']) _, y_score_p = simple_ann.forward_propagation(X_test_nn, param_dict[params['num_iter']], layer_dims) y_score = (y_score_p > 0.5).astype(float) print('\tPredictions == 1: ', y_score.sum()) text = '%s_test_%s' %(algo_name, str(layer_dims)) print('-------------------------------') print('\t\t%s\t\t'%text) print('-------------------------------') evaluate_classifier(np.array(y_test_nn).reshape(1,-1), y_score.reshape(1,-1)) print('\n\n')
def evaluate_with_comparison(networks, dataloader, comparison_dataloader=None, **options): # comparison_dataloader = get_comparison_dataloader(**options) # if comparison_dataloader: # options['fold'] = 'openset_{}'.format(comparison_dataloader.dsf.name) options['fold'] = 'openset_{}'.format(options['data_dir'].split('/')[-1]) if options.get('mode'): options['fold'] += '_{}'.format(options['mode']) if options.get('aux_dataset'): aux_dataset = CustomDataloader(options['aux_dataset']) options['fold'] = '{}_{}'.format(options.get('fold'), aux_dataset.dsf.count()) new_results = evaluation.evaluate_classifier(networks, dataloader, comparison_dataloader, **options) if comparison_dataloader is not None: openset_results = evaluation.evaluate_openset(networks, dataloader, comparison_dataloader, **options) new_results[options['fold']].update(openset_results) return new_results[options['fold']]
comparison_dataloader = None if options['comparison_dataset']: comparison_options = options.copy() comparison_options['dataset'] = options['comparison_dataset'] comparison_dataloader = CustomDataloader(last_batch=True, shuffle=False, **comparison_options) comparison_name = options['comparison_dataset'].split('/')[-1].split( '.')[0] labels_dir = os.path.join(options['result_dir'], 'labels') if os.path.exists(labels_dir): label_count = len(os.listdir(labels_dir)) else: label_count = 0 # Hack: ignore the label count """ options['fold'] = 'openset_{}_{:04d}'.format(comparison_name, label_count) """ options['fold'] = 'openset_{}'.format(comparison_name) new_results = evaluate_classifier(networks, dataloader, comparison_dataloader, **options) if options['comparison_dataset']: openset_results = evaluate_openset(networks, dataloader, comparison_dataloader, **options) pprint(openset_results) new_results[options['fold'] + '_openset'] = openset_results new_results[options['fold']]['active_learning_label_count'] = label_count save_evaluation(new_results, options['result_dir'], options['epoch'])
def run(i_cv): N_SIG = 15000 N_BKG = N_SIG N_SAMPLES = N_SIG + N_BKG mix = N_SIG / N_SAMPLES model_name = 'PivotClassifier' directory = os.path.join(DIRECTORY, f'cv_{i_cv}') os.makedirs(directory, exist_ok=True) print(f'running iter {i_cv}...') results = {'i_cv': i_cv} seed = SEED + 5 * i_cv train_seed = seed test_seed = seed + 1 # Generate training data generator = Generator(train_seed) z_train = generator.sample_nuisance(N_SIG) X_train, y_train = generator.sample_event(z_train, mix, N_SAMPLES) z_train = np.concatenate((np.zeros(N_BKG), z_train), axis=0) # Define Pivot net = F3Classifier(n_in=2, n_out=1) adv_net = F3GausianMixtureDensity(n_in=1, n_components=5) # net_criterion = nn.CrossEntropyLoss() net_criterion = nn.BCEWithLogitsLoss() adv_criterion = ADVLoss() # ADAM # Reducing optimizer inertia with lower beta1 and beta2 help with density network net_optimizer = optim.Adam(net.parameters(), lr=1e-3, betas=(0.5, 0.9)) adv_optimizer = optim.Adam(adv_net.parameters(), lr=1e-3, betas=(0.5, 0.9)) # SGD # net_optimizer = optim.SGD(net.parameters(), lr=1e-3) # adv_optimizer = optim.SGD(adv_net.parameters(), lr=1e-3) # model = PivotClassifier(net, adv_net, net_criterion, adv_criterion, TRADE_OFF, net_optimizer, adv_optimizer, model = PivotBinaryClassifier(net, adv_net, net_criterion, adv_criterion, TRADE_OFF, net_optimizer, adv_optimizer, n_net_pre_training_steps=500, n_adv_pre_training_steps=3000, n_steps=2000, n_recovery_steps=20, batch_size=128, rescale=True, cuda=False, verbose=0) # Train Pivot model.fit(X_train, y_train, z_train) # Generate testing data generator = Generator(test_seed) z_test = generator.sample_nuisance(N_SIG) X_test, y_test = generator.sample_event(z_test, mix, N_SAMPLES) # Evaluation r = evaluate_neural_net(model, prefix='train', model_name=model_name, directory=directory) results.update(r) evaluate_pivotal(model, generator, prefix='test', model_name=model_name, directory=directory) r = evaluate_classifier(model, X_train, y_train, prefix='train', model_name=model_name, directory=directory) results.update(r) r = evaluate_classifier(model, X_test, y_test, prefix='test', model_name=model_name, directory=directory) results.update(r) return results
import numpy as np import read_dataset as rd import evaluation as e from sklearn.ensemble import RandomForestClassifier from sklearn import cross_validation # loading training data print('Loading training data') X,y = rd.read_train() X,y = rd.nudge_dataset(X,y) scores = [] scores_std = [] #just so we know it didn't blow up or something print('Start learning...') #The last few might be excessive. forests = [10, 15, 20, 25, 30, 40, 50, 70, 100, 125, 150, 175, 200, 250] for tree in forests: print("This forest has {} trees!".format(tree)) classifier = RandomForestClassifier(tree) #score = cross_validation.cross_val_score(classifier, X, y) #scores.append(np.mean(score)) #scores_std.append(np.std(score)) name = "plots_extended/RandomForest_{}_trees.png".format(tree) e.evaluate_classifier(classifier,X,y, name=name) #print('Score: ', np.array(scores)) #print('Std : ', np.array(scores_std))
options['result_dir'])) options = load_options(options) print("Switching to the most recent version of the network saved in {}".format( options['result_dir'])) options['epoch'] = get_current_epoch(options['result_dir']) print("Loading dataset from file {}".format(options['dataset'])) dataloader = CustomDataloader(last_batch=True, shuffle=False, **options) print("Loading neural network weights...") nets = build_networks(dataloader.num_classes, **options) examples.run_example_code(nets, dataloader, **options) print("Evaluating the accuracy of the classifier on the {} fold".format( options['fold'])) new_results = evaluate_classifier(nets, dataloader, verbose=False, **options) print("Results from evaluate_classifier:") pprint(new_results) acquire_lock(options['result_dir']) try: print("Saving results in {}".format(options['result_dir'])) filename = os.path.join(options['result_dir'], 'example_results.json') with open(filename, 'w') as fp: fp.write(json.dumps(new_results, indent=2)) finally: release_lock(options['result_dir'])
lr_adasyn = train_classifier(X_train, y_train, log_reg, lr_adasyn_params ,"ADASYN") # In[110]: y_test = np.array(y_test).reshape(1, -1) # ##### Evaluation Logistic Regression + SMOTE # In[111]: y_lr_smote_pred = lr_smote.predict(X_test).reshape(1, -1) evaluate_classifier(y_test, y_lr_smote_pred) # ##### Evaluation Logistic Regression + Borderline # In[112]: y_lr_borderline_pred = lr_borderline.predict(X_test).reshape(1, -1) evaluate_classifier(y_test, y_lr_borderline_pred) # ##### Evaluation Logistic Regression + ADASYN # In[113]:
import numpy as np import read_dataset as rd import evaluation as e from sklearn.ensemble import RandomForestClassifier from sklearn import cross_validation # loading training data print('Loading training data') X, y = rd.read_train() X, y = rd.nudge_dataset(X, y) scores = [] scores_std = [] #just so we know it didn't blow up or something print('Start learning...') #The last few might be excessive. forests = [10, 15, 20, 25, 30, 40, 50, 70, 100, 125, 150, 175, 200, 250] for tree in forests: print("This forest has {} trees!".format(tree)) classifier = RandomForestClassifier(tree) #score = cross_validation.cross_val_score(classifier, X, y) #scores.append(np.mean(score)) #scores_std.append(np.std(score)) name = "plots_extended/RandomForest_{}_trees.png".format(tree) e.evaluate_classifier(classifier, X, y, name=name) #print('Score: ', np.array(scores)) #print('Std : ', np.array(scores_std))