Ejemplo n.º 1
0
				 epochs=adv_numberOfEpochs)
        
        #Adversarial training
        losses = {"L_f": [], "L_r": [], "L_f - L_r": []}	
        batch_size = 128      
        
        num_epochs=200        
        for i in range(num_epochs):
	    	print i
		l = DRf.evaluate(test_x, [test_y, df_Convert_v2_Test])
		losses["L_f - L_r"].append(l[0][None][0])
		losses["L_f"].append(l[1][None][0])
		losses["L_r"].append(-l[2][None][0])
		print(losses["L_r"][-1] / lam)

		plot_losses(i, losses, lam, num_epochs)

            	#Fit "model"
                model.trainable = True
		advmodel.trainable = False

                model.compile(loss=[make_loss_model(c=1.0)], optimizer=opt_model, metrics=['accuracy'])               
                DRf.compile(loss=[make_loss_model(c=1.0), make_loss_advmodel(c=-lam)], optimizer=opt_DRf)
                DfR.compile(loss=[make_loss_advmodel(c=1.0)], optimizer=opt_DfR)
         
		indices = np.random.permutation(len(train_x))[:batch_size]
                #model.train_on_batch(train_x.iloc[indices],train_y.iloc[indices]) 

 		DRf.train_on_batch(train_x.iloc[indices], [train_y.iloc[indices], df_Convert_v2.iloc[indices]])             
	    	
		#Fit "advmodel"
					nJets_binned_bkg,
					callbacks=[earlystop2],
					epochs = adv_numberOfEpochs)

		# Adversarial training
		num_epochs = 200
		for i in range(num_epochs):
			print 'Adversarial training epoch: ', i+1

			l = DRf.evaluate([test_x, test_x.dM_Go_LSP], [test_y, nJets_binned_test])
			losses["L_f - L_r"].append(l[0][None][0])
			losses["L_f"].append(l[1][None][0])
			losses["L_r"].append(-l[2][None][0])
			print(losses["L_r"][-1] / lam)

			plot_losses(i, losses, lam, num_epochs, 'Losses_nJets_PAT_noLO_HT_lambda'+str(lam))

			#Fit "model"
			model.trainable = True
			advmodel.trainable = False

			model.compile(loss=[make_loss_model(c=1.0)], optimizer=opt_model, metrics=['accuracy'])
			DRf.compile(loss=[make_loss_model(c=1.0), make_loss_advmodel(c=-lam)], optimizer=opt_DRf)
			DfR.compile(loss=[make_loss_advmodel(c=1.0)], optimizer=opt_DfR)

			indices = np.random.permutation(len(train_x))[:batch_size]
			DRf.train_on_batch([train_x.iloc[indices], train_x.dM_Go_LSP.iloc[indices]], [train_y.iloc[indices], nJets_binned.iloc[indices]])

			#Fit "advmodel"
			if lam >= 0.0:
				model.trainable = False
Ejemplo n.º 3
0
        print 'Epoch mean loss: ', round(np.mean(epoch_losses), 5)
        for (x_batch, y_batch) in val_data_batched:
            classifier_model.compile(loss=make_disco_loss(
                x_batch.numpy()[:, decorr_var_col]),
                                     optimizer=opt_model,
                                     metrics=['accuracy'])
            val_loss, val_acc = classifier_model.evaluate(x_batch,
                                                          y_batch,
                                                          batch_size=batchSize)
            val_epoch_losses.append(val_loss)
            tf.keras.backend.clear_session()
        print 'Epoch validation mean loss: ', round(np.mean(val_epoch_losses),
                                                    5)
        losses["L_t"].append(np.mean(epoch_losses))
        losses["L_v"].append(np.mean(val_epoch_losses))
        plot_losses(epoch, losses, lam, numberOfEpochs,
                    'Losses_DisCo_dilepton_lambda_' + str(lam))
        tf.keras.backend.clear_session()
        epoch_end_time = time.time()
        print 'Epoch time elapsed: ', np.round(
            (epoch_end_time - epoch_start_time), 3)
        print "End of epoch: ", epoch + 1
        print '-------------'

    print ' - Test set ROC AUC: ', round(
        roc_auc_score(test_y, classifier_model.predict(test_x)), 4)

    # Save the model
    save_path = '/work/kimmokal/susyDNN/models/'
    save_name = 'susyDNN_DisCo_dilepton_nJets_lambda' + str(lam) + '_' + str(
        mass_point)
    classifier_model.save(save_path + save_name + '.h5')
Ejemplo n.º 4
0
                                     hist_dnn_output_njet_6to8)
        js_distances["JS1"].append(js1)
        js_distances["JS2"].append(js2)
        print 'DNN output Jensen-Shannon distance (nJet = [6,7,8] vs. nJet = [4,5]): ' + str(
            js1)
        print 'DNN output Jensen-Shannon distance (nJet >= 9 vs. nJet = [6,7,8]): ' + str(
            js2)

        ### Save losses and plot
        l = DRf.evaluate(test_x, [test_y, nJets_binned_test], batch_size=512)
        losses["L_f - L_r"].append(l[0][None][0])
        losses["L_f"].append(l[1][None][0])
        losses["L_r"].append(-l[2][None][0])
        print("Loss: " + str(losses["L_r"][-1] / lam))

        plot_losses(i, losses, lam, num_epochs,
                    'Losses_adversarial_' + model_name + '_lambda_' + str(lam))
        plot_jensenshannon(i, js_distances, lam, num_epochs,
                           'JS_distance_' + model_name + '_lambda_' + str(lam))
        plot_inefficiencies(
            i, inefficiencies_compressed, inefficiencies_uncompressed, lam,
            num_epochs, 'Inefficiencies_' + model_name + '_lambda' + str(lam))

        roc_aoc = 1 - round(roc_auc_score(test_y, model.predict(test_x)), 4)
        print 'ROC area over curve: ' + str(roc_aoc)

        # Save the metrics for the best models to a .csv file
        if (js1 <= 0.08 and js2 <= 0.08 and sig_uncompressed_ineff <= 0.7
                and sig_compressed_ineff <= 0.8
                and bkg_uncompressed_ineff <= 0.1):
            metrics_path = save_path + model_name + '_best_metrics.txt'
            metrics_to_csv(metrics_path, i + 1, js1, js2,
        print 'Epoch mean loss: ', round(np.mean(epoch_losses), 5)
        for (x_batch, y_batch) in val_data_batched:
            classifier_model.compile(loss=make_disco_loss(
                x_batch.numpy()[:, decorr_var_col]),
                                     optimizer=opt_model,
                                     metrics=['accuracy'])
            val_loss, val_acc = classifier_model.evaluate(x_batch,
                                                          y_batch,
                                                          batch_size=batchSize)
            val_epoch_losses.append(val_loss)
            tf.keras.backend.clear_session()
        print 'Epoch validation mean loss: ', round(np.mean(val_epoch_losses),
                                                    5)
        losses["L_t"].append(np.mean(epoch_losses))
        losses["L_v"].append(np.mean(val_epoch_losses))
        plot_losses(epoch, losses, lam, numberOfEpochs,
                    'Losses_DisCo_all_bkg_lambda_' + str(lam))
        tf.keras.backend.clear_session()
        epoch_end_time = time.time()
        print 'Epoch time elapsed: ', np.round(
            (epoch_end_time - epoch_start_time), 3)
        print "End of epoch: ", epoch + 1
        print '-------------'

    print ' - Test set ROC AUC: ', round(
        roc_auc_score(test_y, classifier_model.predict(test_x)), 4)

    # Save the model
    save_path = '/work/kimmokal/susyDNN/models/'
    save_name = 'susyDNN_DisCo_all_bkg_nJets_lambda' + str(lam) + '_' + str(
        mass_point)
    classifier_model.save(save_path + save_name + '.h5')
    print "Signal Inefficiency Uncompressed: " + str(Signal_Uncompressed_Ineff)
    print "Bkg Inefficiency Uncompressed: " + str(Bkg_Uncompressed_Ineff)
    #///////////////////////#

    ### Save losses and plot
    l = DRf.evaluate(test_x, [test_y, nJets_binned_test])
    losses["L_f - L_r"].append(l[0][None][0])
    losses["L_f"].append(l[1][None][0])
    losses["L_r"].append(-l[2][None][0])
    print("Loss L_r: " + str(losses["L_r"][-1] / lam))
    print("Loss L_f: " + str(losses["L_f"][-1]))

    print(" ")

    if (i == num_epochs - 1):
        plot_losses(i, losses, lam, num_epochs,
                    'Losses_adversarial_reduced_bkg_lambda' + str(lam))
        plot_jensenshannon(i, js_distances, lam, num_epochs,
                           'JS_distance_reduced_bkg_lambda' + str(lam))
        plot_Inefficiencies(i, inefficiencies_Compressed,
                            inefficiencies_Uncompressed, lam, num_epochs,
                            'Inefficiencies_reduced_bkg_lambda' + str(lam))

    save_name = 'susyDNN_adv_model_reduced_bkg_lambda_' + str(lam) + "_" + str(
        i)
    model.save(save_path2 + save_name + '.h5')

print '- - - - - - -'
print ' - second test set roc auc: ', round(
    roc_auc_score(test_y, model.predict(test_x)), 4)

# Check decorrelation for test set
Ejemplo n.º 7
0
					nJets_binned_bkg,
					callbacks=[earlystop2],
					epochs = adv_numberOfEpochs)

		# Adversarial training
		num_epochs = 200
		for i in range(num_epochs):
			print 'Adversarial training epoch: ', i+1

			l = DRf.evaluate(test_x, [test_y, nJets_binned_test])
			losses["L_f - L_r"].append(l[0][None][0])
			losses["L_f"].append(l[1][None][0])
			losses["L_r"].append(-l[2][None][0])
			print(losses["L_r"][-1] / lam)

			plot_losses(i, losses, lam, num_epochs, 'Losses_nJets_dilepton_lambda'+str(lam))

			#Fit "model"
			model.trainable = True
			advmodel.trainable = False

			model.compile(loss=[make_loss_model(c=1.0)], optimizer=opt_model, metrics=['accuracy'])
			DRf.compile(loss=[make_loss_model(c=1.0), make_loss_advmodel(c=-lam)], optimizer=opt_DRf)
			DfR.compile(loss=[make_loss_advmodel(c=1.0)], optimizer=opt_DfR)

			indices = np.random.permutation(len(train_x))[:batch_size]
			DRf.train_on_batch(train_x.iloc[indices], [train_y.iloc[indices], nJets_binned.iloc[indices]])

			#Fit "advmodel"
			if lam >= 0.0:
				model.trainable = False