def plot_ROC(y_test, yhat, ip3d, MODEL_FILE): ''' plot a ROC curve for the discriminant Args: ----- y_test: the truth labels for the trst set yhat: the predicted probabilities of each class in the test set Returns: -------- a mpl.figure ''' from viz import calculate_roc, ROC_plotter, add_curve # -- bring classes back to usual format: [0,2,3,0,1,2,0,2,2,...] y = np.array([np.argmax(ev) for ev in y_test]) # -- for b VS. light bl_sel = (y == 0) | (y == 2) # -- for c VS. light cl_sel = (y == 0) | (y == 1) # -- add ROC curves discs = {} add_curve( r'IP3D', 'black', calculate_roc( (y[bl_sel & np.isfinite( np.log(ip3d.jet_ip3d_pb / ip3d.jet_ip3d_pu).values)] == 2), np.log(ip3d.jet_ip3d_pb / ip3d.jet_ip3d_pu)[bl_sel & np.isfinite( np.log(ip3d.jet_ip3d_pb / ip3d.jet_ip3d_pu).values)]), discs) add_curve( MODEL_FILE, 'blue', calculate_roc( (y[bl_sel & np.isfinite(np.log(yhat[:, 2] / yhat[:, 0]))] == 2), np.log( yhat[:, 2] / yhat[:, 0])[bl_sel & np.isfinite(np.log(yhat[:, 2] / yhat[:, 0]))]), discs) print 'Pickling ROC curves' import cPickle cPickle.dump(discs[MODEL_FILE], open(MODEL_FILE + '.pkl', 'wb'), cPickle.HIGHEST_PROTOCOL) print 'Plotting' fg = ROC_plotter(discs, title=r'Impact Parameter Taggers', min_eff=0.5, max_eff=1.0, logscale=True) return fg
def performance(yhat, test, iptagger): # -- Find flavors after applying cuts: bl_sel = (test['y'] == 5) | (test['y'] == 0) cl_sel = (test['y'] == 4) | (test['y'] == 0) bc_sel = (test['y'] == 5) | (test['y'] == 4) fin1 = np.isfinite(np.log(yhat[:, 2] / yhat[:, 0])[bl_sel]) bl_curves = {} add_curve(r'MV2c10+' + iptagger, 'green', calculate_roc( test['y'][bl_sel][fin1] == 5, np.log(yhat[:, 2] / yhat[:, 0])[bl_sel][fin1]), bl_curves) add_curve(r'MV2c10', 'red', calculate_roc( test['y'][bl_sel] == 5, test['mv2c10'][bl_sel]), bl_curves) cPickle.dump(bl_curves, open('ROC_MV2c10+' + iptagger + '_' + model_id + '_bl.pkl', 'wb'), cPickle.HIGHEST_PROTOCOL) fg = ROC_plotter(bl_curves, title=r'DL1 + IP Taggers Combination', min_eff = 0.5, max_eff=1.0, logscale=True, ymax = 10000) fg.savefig('ROC_MV2c10+' + iptagger + '_' + model_id + '_bl.pdf') fin1 = np.isfinite(np.log(yhat[:, 2] / yhat[:, 1])[bc_sel]) bc_curves = {} add_curve(r'MV2c10+' + iptagger, 'green', calculate_roc( test['y'][bc_sel][fin1] == 5, np.log(yhat[:, 2] / yhat[:, 1])[bc_sel][fin1]), bc_curves) add_curve(r'MV2c10', 'red', calculate_roc( test['y'][bc_sel] == 5, test['mv2c10'][bc_sel]), bc_curves) cPickle.dump(bc_curves, open('ROC_MV2c10+' + iptagger + '_' + model_id + '_bc.pkl', 'wb'), cPickle.HIGHEST_PROTOCOL) fg = ROC_plotter(bc_curves, title=r'DL1 + IP Taggers Combination', min_eff = 0.5, max_eff=1.0, logscale=True, ymax = 100) fg.savefig('ROC_MV2c10+' + iptagger + '_' + model_id + '_bc.pdf')
def plot_ROC(y_test, yhat, ip3d, MODEL_FILE): ''' plot a ROC curve for the discriminant Args: ----- y_test: the truth labels for the trst set yhat: the predicted probabilities of each class in the test set Returns: -------- a mpl.figure ''' from viz import calculate_roc, ROC_plotter, add_curve # -- bring classes back to usual format: [0,2,3,0,1,2,0,2,2,...] y = np.array([np.argmax(ev) for ev in y_test]) # -- for b VS. light bl_sel = (y == 0) | (y == 2) # -- for c VS. light cl_sel = (y == 0) | (y == 1) # -- add ROC curves discs = {} add_curve(r'IP3D', 'black', calculate_roc((y[ bl_sel & np.isfinite(np.log(ip3d.jet_ip3d_pb/ip3d.jet_ip3d_pu).values) ] == 2), np.log(ip3d.jet_ip3d_pb/ip3d.jet_ip3d_pu)[ bl_sel & np.isfinite(np.log(ip3d.jet_ip3d_pb/ip3d.jet_ip3d_pu).values) ]), discs) add_curve(MODEL_FILE, 'blue', calculate_roc( (y[ bl_sel & np.isfinite(np.log(yhat[:,2] / yhat[:,0]))] == 2), np.log(yhat[:,2] / yhat[:,0])[bl_sel & np.isfinite(np.log(yhat[:,2] / yhat[:,0]))] ), discs) print 'Pickling ROC curves' cPickle.dump(discs[MODEL_FILE], open(MODEL_FILE + '.pkl', 'wb'), cPickle.HIGHEST_PROTOCOL) print 'Plotting' fg = ROC_plotter(discs, title=r'Impact Parameter Taggers', min_eff = 0.5, max_eff=1.0, logscale=True) return fg
def plot_roc(yhat, data, model_name): ''' Args: yhat: an ndarray of the probability of each event for each class data: dictionary containing X, y, w ndarrays model_name: Returns: plot: pickle file: pkl file dictionary with each curve ''' # -- hardcoded in from cutflow!! extract them instead cutflow_eff = [0.0699191919192, 0.0754639175258, 0.08439, 0.0921212121212, 0.110275510204, 0.00484432269559] y_test = data['y_test'] w_test = data['w_test'] le = data['LabelEncoder'] bkg_col = np.argwhere(le.classes_ == 'bkg')[0][0] pkl_dict = {} for k in np.unique(y_test)[np.unique(y_test) != bkg_col]: k_string = le.inverse_transform(k) selection = (y_test == k) | (y_test == bkg_col) finite = np.isfinite(np.log(yhat[selection][:, k] / yhat[selection][:, bkg_col])) curves = {} add_curve('DNN', 'black', calculate_roc( y_test[selection][finite] == k, np.log(yhat[selection][finite][:, k] / yhat[selection][finite][:, bkg_col]), weights=w_test[selection][finite] ), curves ) pkl_dict.update(curves) fig = ROC_plotter(curves, title=k_string + r' vs. Sherpa $\gamma \gamma$ Background', min_eff=0.05, max_eff=1.0, ymax=500, logscale=False) plt.scatter(cutflow_eff[k], 1. / cutflow_eff[bkg_col], label='Cutflow ' + k_string) plt.legend() matplotlib.rcParams.update({'font.size': 16}) fig.savefig('roc_' + k_string + '_' + model_name +'.pdf') cPickle.dump(pkl_dict, open(model_name + '.pkl', 'wb'))
def performance(yhat, test, iptagger): # -- Find flavors after applying cuts: bl_sel = (test['y'] == 5) | (test['y'] == 0) cl_sel = (test['y'] == 4) | (test['y'] == 0) bc_sel = (test['y'] == 5) | (test['y'] == 4) fin1 = np.isfinite(np.log(yhat[:, 2] / yhat[:, 0])[bl_sel]) bl_curves = {} add_curve( r'MV2c10+' + iptagger, 'green', calculate_roc(test['y'][bl_sel][fin1] == 5, np.log(yhat[:, 2] / yhat[:, 0])[bl_sel][fin1]), bl_curves) add_curve(r'MV2c10', 'red', calculate_roc(test['y'][bl_sel] == 5, test['mv2c10'][bl_sel]), bl_curves) cPickle.dump( bl_curves, open('ROC_MV2c10+' + iptagger + '_' + model_id + '_bl.pkl', 'wb'), cPickle.HIGHEST_PROTOCOL) fg = ROC_plotter(bl_curves, title=r'DL1 + IP Taggers Combination', min_eff=0.5, max_eff=1.0, logscale=True, ymax=10000) fg.savefig('ROC_MV2c10+' + iptagger + '_' + model_id + '_bl.pdf') fin1 = np.isfinite(np.log(yhat[:, 2] / yhat[:, 1])[bc_sel]) bc_curves = {} add_curve( r'MV2c10+' + iptagger, 'green', calculate_roc(test['y'][bc_sel][fin1] == 5, np.log(yhat[:, 2] / yhat[:, 1])[bc_sel][fin1]), bc_curves) add_curve(r'MV2c10', 'red', calculate_roc(test['y'][bc_sel] == 5, test['mv2c10'][bc_sel]), bc_curves) cPickle.dump( bc_curves, open('ROC_MV2c10+' + iptagger + '_' + model_id + '_bc.pkl', 'wb'), cPickle.HIGHEST_PROTOCOL) fg = ROC_plotter(bc_curves, title=r'DL1 + IP Taggers Combination', min_eff=0.5, max_eff=1.0, logscale=True, ymax=100) fg.savefig('ROC_MV2c10+' + iptagger + '_' + model_id + '_bc.pdf')
def performance(yhat, y, mv2c10, iptagger, extratitle=''): # -- Find flavors after applying cuts: bl_sel = (y == 5) | (y == 0) cl_sel = (y == 4) | (y == 0) bc_sel = (y == 5) | (y == 4) fin1 = np.isfinite(np.log(yhat[:, 2] / yhat[:, 0])[bl_sel]) bl_curves = {} add_curve(r'DL1' + iptagger, 'green', calculate_roc( y[bl_sel][fin1] == 5, np.log(yhat[:, 2] / yhat[:, 0])[bl_sel][fin1]), bl_curves) add_curve(r'MV2c10', 'red', calculate_roc( y[bl_sel] == 5, mv2c10[bl_sel]), bl_curves) cPickle.dump(bl_curves, open('ROC_' + iptagger + '_' + MODEL_NAME + '_genprova_bl.pkl', 'wb'), cPickle.HIGHEST_PROTOCOL) fg = ROC_plotter(bl_curves, title=r'DL1 vs MV2c10 '+extratitle, min_eff = 0.5, max_eff=1.0, logscale=True, ymax = 10000) fg.savefig('ROC_' + iptagger + '_' + MODEL_NAME + '_' + extratitle +'_genprova_bl.pdf') fin1 = np.isfinite(np.log(yhat[:, 2] / yhat[:, 1])[bc_sel]) bc_curves = {} add_curve(r'DL1' + iptagger, 'green', calculate_roc( y[bc_sel][fin1] == 5, np.log(yhat[:, 2] / yhat[:, 1])[bc_sel][fin1]), bc_curves) add_curve(r'MV2c10', 'red', calculate_roc( y[bc_sel] == 5, mv2c10[bc_sel]), bc_curves) cPickle.dump(bc_curves, open('ROC_' + iptagger + '_' + MODEL_NAME + '_genprova_bc.pkl', 'wb'), cPickle.HIGHEST_PROTOCOL) fg = ROC_plotter(bc_curves, title=r'DL1 vs MV2c10 ' + extratitle, min_eff = 0.5, max_eff=1.0, logscale=True, ymax = 100) fg.savefig('ROC_' + iptagger + '_' + MODEL_NAME + '_' + extratitle +'_genprova_bc.pdf') plt.close(fg) def find_nearest(array, value): return (np.abs(array-value)).argmin() return {extratitle : { 'DL1_70_bl' : bl_curves[r'DL1' + iptagger]['rejection'][find_nearest(bl_curves[r'DL1' + iptagger]['efficiency'], 0.7)], 'DL1_70_bc' : bc_curves[r'DL1' + iptagger]['rejection'][find_nearest(bc_curves[r'DL1' + iptagger]['efficiency'], 0.7)], 'MV2_70_bl' : bl_curves[r'MV2c10']['rejection'][find_nearest(bl_curves[r'MV2c10']['efficiency'], 0.7)], 'MV2_70_bc' : bc_curves[r'MV2c10']['rejection'][find_nearest(bc_curves[r'MV2c10']['efficiency'], 0.7)] } }
def plot_ROC(y_test, yhat, ip3d, run_name): ''' Args: ----- y_test: the truth labels for the test set yhat: the predicted probabilities of each class in the test set ip3d: run_name: ''' from viz import calculate_roc, ROC_plotter, add_curve logger = logging.getLogger("plot ROC") # -- bring classes back to usual format: [0,2,3,0,1,2,0,2,2,...] y = np.array([np.argmax(ev) for ev in y_test]) # -- for b VS. light bl_sel = (y == 0) | (y == 2) # -- for c VS. light cl_sel = (y == 0) | (y == 1) # -- for b VS. c bc_sel = (y == 1) | (y == 2) # -- add ROC curves discs = {} add_curve( r'IP3D', 'black', calculate_roc( (y[bl_sel & np.isfinite( np.log(ip3d.jet_ip3d_pb / ip3d.jet_ip3d_pu).values)] == 2), np.log(ip3d.jet_ip3d_pb / ip3d.jet_ip3d_pu)[bl_sel & np.isfinite( np.log(ip3d.jet_ip3d_pb / ip3d.jet_ip3d_pu).values)]), discs) add_curve( 'RNNIP', 'blue', calculate_roc( (y[bl_sel & np.isfinite(np.log(yhat[:, 2] / yhat[:, 0]))] == 2), np.log( yhat[:, 2] / yhat[:, 0])[bl_sel & np.isfinite(np.log(yhat[:, 2] / yhat[:, 0]))]), discs) discs_bc = {} add_curve( r'IP3D', 'black', calculate_roc( (y[bc_sel & np.isfinite( np.log(ip3d.jet_ip3d_pb / ip3d.jet_ip3d_pc).values)] == 2), np.log(ip3d.jet_ip3d_pb / ip3d.jet_ip3d_pc)[bc_sel & np.isfinite( np.log(ip3d.jet_ip3d_pb / ip3d.jet_ip3d_pc).values)]), discs_bc) add_curve( 'RNNIP', 'blue', calculate_roc( (y[bc_sel & np.isfinite(np.log(yhat[:, 2] / yhat[:, 1]))] == 2), np.log( yhat[:, 2] / yhat[:, 1])[bc_sel & np.isfinite(np.log(yhat[:, 2] / yhat[:, 1]))]), discs_bc) logger.info('Pickling ROC curves') safe_mkdir('roc_pickles') cPickle.dump(discs['RNNIP'], open(os.path.join('roc_pickles', run_name + '_bl.pkl'), 'wb'), cPickle.HIGHEST_PROTOCOL) cPickle.dump(discs_bc['RNNIP'], open(os.path.join('roc_pickles', run_name + '_bc.pkl'), 'wb'), cPickle.HIGHEST_PROTOCOL) logger.info('Plotting') safe_mkdir('plots') fg = ROC_plotter(discs, title=r'Impact Parameter Taggers', min_eff=0.5, max_eff=1.0, logscale=True) fg.savefig(os.path.join('plots', 'roc' + run_name + '.pdf')) fg = ROC_plotter(discs_bc, title=r'Impact Parameter Taggers', min_eff=0.5, max_eff=1.0, logscale=True, ymax=10**2) fg.savefig(os.path.join('plots', 'roc' + run_name + '_bc.pdf'))
def performance(yhat, y, mv2c10, iptagger, extratitle=''): # -- Find flavors after applying cuts: bl_sel = (y == 5) | (y == 0) cl_sel = (y == 4) | (y == 0) bc_sel = (y == 5) | (y == 4) fin1 = np.isfinite(np.log(yhat[:, 2] / yhat[:, 0])[bl_sel]) bl_curves = {} add_curve( r'DL1' + iptagger, 'green', calculate_roc(y[bl_sel][fin1] == 5, np.log(yhat[:, 2] / yhat[:, 0])[bl_sel][fin1]), bl_curves) add_curve(r'MV2c10', 'red', calculate_roc(y[bl_sel] == 5, mv2c10[bl_sel]), bl_curves) cPickle.dump( bl_curves, open('ROC_' + iptagger + '_' + MODEL_NAME + '_genprova_bl.pkl', 'wb'), cPickle.HIGHEST_PROTOCOL) fg = ROC_plotter(bl_curves, title=r'DL1 vs MV2c10 ' + extratitle, min_eff=0.5, max_eff=1.0, logscale=True, ymax=10000) fg.savefig('ROC_' + iptagger + '_' + MODEL_NAME + '_' + extratitle + '_genprova_bl.pdf') fin1 = np.isfinite(np.log(yhat[:, 2] / yhat[:, 1])[bc_sel]) bc_curves = {} add_curve( r'DL1' + iptagger, 'green', calculate_roc(y[bc_sel][fin1] == 5, np.log(yhat[:, 2] / yhat[:, 1])[bc_sel][fin1]), bc_curves) add_curve(r'MV2c10', 'red', calculate_roc(y[bc_sel] == 5, mv2c10[bc_sel]), bc_curves) cPickle.dump( bc_curves, open('ROC_' + iptagger + '_' + MODEL_NAME + '_genprova_bc.pkl', 'wb'), cPickle.HIGHEST_PROTOCOL) fg = ROC_plotter(bc_curves, title=r'DL1 vs MV2c10 ' + extratitle, min_eff=0.5, max_eff=1.0, logscale=True, ymax=100) fg.savefig('ROC_' + iptagger + '_' + MODEL_NAME + '_' + extratitle + '_genprova_bc.pdf') plt.close(fg) def find_nearest(array, value): return (np.abs(array - value)).argmin() return { extratitle: { 'DL1_70_bl': bl_curves[r'DL1' + iptagger]['rejection'][find_nearest( bl_curves[r'DL1' + iptagger]['efficiency'], 0.7)], 'DL1_70_bc': bc_curves[r'DL1' + iptagger]['rejection'][find_nearest( bc_curves[r'DL1' + iptagger]['efficiency'], 0.7)], 'MV2_70_bl': bl_curves[r'MV2c10']['rejection'][find_nearest( bl_curves[r'MV2c10']['efficiency'], 0.7)], 'MV2_70_bc': bc_curves[r'MV2c10']['rejection'][find_nearest( bc_curves[r'MV2c10']['efficiency'], 0.7)] } }
def plot_ROC(y_test, yhat, ip3d, run_name): ''' Args: ----- y_test: the truth labels for the test set yhat: the predicted probabilities of each class in the test set ip3d: run_name: ''' from viz import calculate_roc, ROC_plotter, add_curve logger = logging.getLogger("plot ROC") # -- bring classes back to usual format: [0,2,3,0,1,2,0,2,2,...] y = np.array([np.argmax(ev) for ev in y_test]) # -- for b VS. light bl_sel = (y == 0) | (y == 2) # -- for c VS. light cl_sel = (y == 0) | (y == 1) # -- for b VS. c bc_sel = (y == 1) | (y == 2) # -- add ROC curves discs = {} add_curve(r'IP3D', 'black', calculate_roc( (y[ bl_sel & np.isfinite(np.log(ip3d.jet_ip3d_pb/ip3d.jet_ip3d_pu).values) ] == 2), np.log(ip3d.jet_ip3d_pb/ip3d.jet_ip3d_pu)[ bl_sel & np.isfinite(np.log(ip3d.jet_ip3d_pb/ip3d.jet_ip3d_pu).values) ] ), discs ) add_curve('RNNIP', 'blue', calculate_roc( (y[ bl_sel & np.isfinite(np.log(yhat[:,2] / yhat[:,0]))] == 2), np.log(yhat[:,2] / yhat[:,0])[bl_sel & np.isfinite(np.log(yhat[:,2] / yhat[:,0]))] ), discs ) discs_bc = {} add_curve(r'IP3D', 'black', calculate_roc( (y[ bc_sel & np.isfinite(np.log(ip3d.jet_ip3d_pb/ip3d.jet_ip3d_pc).values) ] == 2), np.log(ip3d.jet_ip3d_pb/ip3d.jet_ip3d_pc)[ bc_sel & np.isfinite(np.log(ip3d.jet_ip3d_pb/ip3d.jet_ip3d_pc).values) ] ), discs_bc ) add_curve('RNNIP', 'blue', calculate_roc( (y[ bc_sel & np.isfinite(np.log(yhat[:,2] / yhat[:,1]))] == 2), np.log(yhat[:,2] / yhat[:,1])[bc_sel & np.isfinite(np.log(yhat[:,2] / yhat[:,1]))] ), discs_bc ) logger.info('Pickling ROC curves') safe_mkdir('roc_pickles') cPickle.dump( discs['RNNIP'], open(os.path.join('roc_pickles', run_name +'_bl.pkl'), 'wb'), cPickle.HIGHEST_PROTOCOL ) cPickle.dump( discs_bc['RNNIP'], open(os.path.join('roc_pickles', run_name +'_bc.pkl'), 'wb'), cPickle.HIGHEST_PROTOCOL ) logger.info('Plotting') safe_mkdir('plots') fg = ROC_plotter( discs, title=r'Impact Parameter Taggers', min_eff = 0.5, max_eff=1.0, logscale=True ) fg.savefig(os.path.join('plots', 'roc' + run_name +'.pdf')) fg = ROC_plotter( discs_bc, title=r'Impact Parameter Taggers', min_eff = 0.5, max_eff=1.0, logscale=True, ymax=10**2 ) fg.savefig(os.path.join('plots', 'roc' + run_name +'_bc.pdf'))