Example #1
0
def plot_ROC(y_test, yhat, ip3d, MODEL_FILE):
    ''' 
    plot a ROC curve for the discriminant
    
    Args:
    -----
        y_test: the truth labels for the trst set
        yhat: the predicted probabilities of each class in the test set
    
    Returns:
    --------
        a mpl.figure
    '''
    from viz import calculate_roc, ROC_plotter, add_curve

    # -- bring classes back to usual format: [0,2,3,0,1,2,0,2,2,...]
    y = np.array([np.argmax(ev) for ev in y_test])

    # -- for b VS. light
    bl_sel = (y == 0) | (y == 2)
    # -- for c VS. light
    cl_sel = (y == 0) | (y == 1)

    # -- add ROC curves
    discs = {}

    add_curve(
        r'IP3D', 'black',
        calculate_roc(
            (y[bl_sel & np.isfinite(
                np.log(ip3d.jet_ip3d_pb / ip3d.jet_ip3d_pu).values)] == 2),
            np.log(ip3d.jet_ip3d_pb / ip3d.jet_ip3d_pu)[bl_sel & np.isfinite(
                np.log(ip3d.jet_ip3d_pb / ip3d.jet_ip3d_pu).values)]), discs)

    add_curve(
        MODEL_FILE, 'blue',
        calculate_roc(
            (y[bl_sel & np.isfinite(np.log(yhat[:, 2] / yhat[:, 0]))] == 2),
            np.log(
                yhat[:, 2] /
                yhat[:, 0])[bl_sel
                            & np.isfinite(np.log(yhat[:, 2] / yhat[:, 0]))]),
        discs)

    print 'Pickling ROC curves'
    import cPickle
    cPickle.dump(discs[MODEL_FILE], open(MODEL_FILE + '.pkl', 'wb'),
                 cPickle.HIGHEST_PROTOCOL)

    print 'Plotting'
    fg = ROC_plotter(discs,
                     title=r'Impact Parameter Taggers',
                     min_eff=0.5,
                     max_eff=1.0,
                     logscale=True)

    return fg
def performance(yhat, test, iptagger):
    # -- Find flavors after applying cuts:
    bl_sel = (test['y'] == 5) | (test['y'] == 0)
    cl_sel = (test['y'] == 4) | (test['y'] == 0)
    bc_sel = (test['y'] == 5) | (test['y'] == 4)

    fin1 = np.isfinite(np.log(yhat[:, 2] / yhat[:, 0])[bl_sel])
    bl_curves = {}
    add_curve(r'MV2c10+' + iptagger, 'green', 
          calculate_roc( test['y'][bl_sel][fin1] == 5, np.log(yhat[:, 2] / yhat[:, 0])[bl_sel][fin1]),
          bl_curves)
    add_curve(r'MV2c10', 'red', 
          calculate_roc( test['y'][bl_sel] == 5, test['mv2c10'][bl_sel]),
          bl_curves)
    cPickle.dump(bl_curves, open('ROC_MV2c10+' + iptagger + '_' + model_id + '_bl.pkl', 'wb'), cPickle.HIGHEST_PROTOCOL)

    fg = ROC_plotter(bl_curves, title=r'DL1 + IP Taggers Combination', min_eff = 0.5, max_eff=1.0, logscale=True, ymax = 10000)
    fg.savefig('ROC_MV2c10+' + iptagger + '_' + model_id + '_bl.pdf')

    fin1 = np.isfinite(np.log(yhat[:, 2] / yhat[:, 1])[bc_sel])
    bc_curves = {}
    add_curve(r'MV2c10+' + iptagger, 'green', 
          calculate_roc( test['y'][bc_sel][fin1] == 5, np.log(yhat[:, 2] / yhat[:, 1])[bc_sel][fin1]),
          bc_curves)
    add_curve(r'MV2c10', 'red', 
          calculate_roc( test['y'][bc_sel] == 5, test['mv2c10'][bc_sel]),
          bc_curves)
    cPickle.dump(bc_curves, open('ROC_MV2c10+' + iptagger + '_' + model_id + '_bc.pkl', 'wb'), cPickle.HIGHEST_PROTOCOL)

    fg = ROC_plotter(bc_curves, title=r'DL1 + IP Taggers Combination', min_eff = 0.5, max_eff=1.0, logscale=True, ymax = 100)
    fg.savefig('ROC_MV2c10+' + iptagger + '_' + model_id + '_bc.pdf')
Example #3
0
def plot_ROC(y_test, yhat, ip3d, MODEL_FILE):
    ''' 
    plot a ROC curve for the discriminant
    
    Args:
    -----
        y_test: the truth labels for the trst set
        yhat: the predicted probabilities of each class in the test set
    
    Returns:
    --------
        a mpl.figure
    '''
    
    from viz import calculate_roc, ROC_plotter, add_curve
    
    # -- bring classes back to usual format: [0,2,3,0,1,2,0,2,2,...]
    y = np.array([np.argmax(ev) for ev in y_test])

    # -- for b VS. light
    bl_sel = (y == 0) | (y == 2)
    # -- for c VS. light
    cl_sel = (y == 0) | (y == 1)

    # -- add ROC curves
    discs = {}

    add_curve(r'IP3D', 'black', calculate_roc((y[ bl_sel & np.isfinite(np.log(ip3d.jet_ip3d_pb/ip3d.jet_ip3d_pu).values) ] == 2), 
                                              np.log(ip3d.jet_ip3d_pb/ip3d.jet_ip3d_pu)[ bl_sel & np.isfinite(np.log(ip3d.jet_ip3d_pb/ip3d.jet_ip3d_pu).values) ]), 
              discs)

    
    add_curve(MODEL_FILE, 'blue', calculate_roc( (y[ bl_sel & np.isfinite(np.log(yhat[:,2] / yhat[:,0]))] == 2), 
                                              np.log(yhat[:,2] / yhat[:,0])[bl_sel &  np.isfinite(np.log(yhat[:,2] / yhat[:,0]))] ), 
              discs)

    print 'Pickling ROC curves'
    
    cPickle.dump(discs[MODEL_FILE], open(MODEL_FILE + '.pkl', 'wb'), cPickle.HIGHEST_PROTOCOL)

    print 'Plotting'
    fg = ROC_plotter(discs, title=r'Impact Parameter Taggers', min_eff = 0.5, max_eff=1.0, logscale=True)

    return fg
def plot_roc(yhat, data, model_name):
	'''
	Args:
		yhat: an ndarray of the probability of each event for each class
		data: dictionary containing X, y, w ndarrays
		model_name:
	Returns:
		plot: 
		pickle file: pkl file dictionary with each curve
	'''
	# -- hardcoded in from cutflow!! extract them instead
	cutflow_eff = [0.0699191919192, 0.0754639175258, 0.08439, 0.0921212121212, 0.110275510204, 0.00484432269559]

	y_test = data['y_test']
	w_test = data['w_test']
	le = data['LabelEncoder']
	bkg_col = np.argwhere(le.classes_ == 'bkg')[0][0]

	pkl_dict = {}
	for k in np.unique(y_test)[np.unique(y_test) != bkg_col]:
		k_string = le.inverse_transform(k)
		selection = (y_test == k) | (y_test == bkg_col)
		finite = np.isfinite(np.log(yhat[selection][:, k] / yhat[selection][:, bkg_col]))
		curves = {}
		add_curve('DNN', 'black', 
			calculate_roc(
				y_test[selection][finite] == k, 
				np.log(yhat[selection][finite][:, k] / yhat[selection][finite][:, bkg_col]), 
				weights=w_test[selection][finite]
				),
			curves
			)
		pkl_dict.update(curves)
		fig = ROC_plotter(curves, 
			title=k_string + r' vs. Sherpa $\gamma \gamma$ Background', 
			min_eff=0.05, max_eff=1.0, ymax=500, 
			logscale=False)
		plt.scatter(cutflow_eff[k], 1. / cutflow_eff[bkg_col], label='Cutflow ' + k_string)
		plt.legend()
		matplotlib.rcParams.update({'font.size': 16})
		fig.savefig('roc_' + k_string + '_' + model_name +'.pdf')
	cPickle.dump(pkl_dict, open(model_name + '.pkl', 'wb'))
Example #5
0
def performance(yhat, test, iptagger):
    # -- Find flavors after applying cuts:
    bl_sel = (test['y'] == 5) | (test['y'] == 0)
    cl_sel = (test['y'] == 4) | (test['y'] == 0)
    bc_sel = (test['y'] == 5) | (test['y'] == 4)

    fin1 = np.isfinite(np.log(yhat[:, 2] / yhat[:, 0])[bl_sel])
    bl_curves = {}
    add_curve(
        r'MV2c10+' + iptagger, 'green',
        calculate_roc(test['y'][bl_sel][fin1] == 5,
                      np.log(yhat[:, 2] / yhat[:, 0])[bl_sel][fin1]),
        bl_curves)
    add_curve(r'MV2c10', 'red',
              calculate_roc(test['y'][bl_sel] == 5, test['mv2c10'][bl_sel]),
              bl_curves)
    cPickle.dump(
        bl_curves,
        open('ROC_MV2c10+' + iptagger + '_' + model_id + '_bl.pkl', 'wb'),
        cPickle.HIGHEST_PROTOCOL)

    fg = ROC_plotter(bl_curves,
                     title=r'DL1 + IP Taggers Combination',
                     min_eff=0.5,
                     max_eff=1.0,
                     logscale=True,
                     ymax=10000)
    fg.savefig('ROC_MV2c10+' + iptagger + '_' + model_id + '_bl.pdf')

    fin1 = np.isfinite(np.log(yhat[:, 2] / yhat[:, 1])[bc_sel])
    bc_curves = {}
    add_curve(
        r'MV2c10+' + iptagger, 'green',
        calculate_roc(test['y'][bc_sel][fin1] == 5,
                      np.log(yhat[:, 2] / yhat[:, 1])[bc_sel][fin1]),
        bc_curves)
    add_curve(r'MV2c10', 'red',
              calculate_roc(test['y'][bc_sel] == 5, test['mv2c10'][bc_sel]),
              bc_curves)
    cPickle.dump(
        bc_curves,
        open('ROC_MV2c10+' + iptagger + '_' + model_id + '_bc.pkl', 'wb'),
        cPickle.HIGHEST_PROTOCOL)

    fg = ROC_plotter(bc_curves,
                     title=r'DL1 + IP Taggers Combination',
                     min_eff=0.5,
                     max_eff=1.0,
                     logscale=True,
                     ymax=100)
    fg.savefig('ROC_MV2c10+' + iptagger + '_' + model_id + '_bc.pdf')
def performance(yhat, y, mv2c10, iptagger, extratitle=''):
    # -- Find flavors after applying cuts:
    bl_sel = (y == 5) | (y == 0)
    cl_sel = (y == 4) | (y == 0)
    bc_sel = (y == 5) | (y == 4)

    fin1 = np.isfinite(np.log(yhat[:, 2] / yhat[:, 0])[bl_sel])
    bl_curves = {}
    add_curve(r'DL1' + iptagger, 'green', 
          calculate_roc( y[bl_sel][fin1] == 5, np.log(yhat[:, 2] / yhat[:, 0])[bl_sel][fin1]),
          bl_curves)
    add_curve(r'MV2c10', 'red', 
          calculate_roc( y[bl_sel] == 5, mv2c10[bl_sel]),
          bl_curves)
    cPickle.dump(bl_curves, open('ROC_' + iptagger + '_' + MODEL_NAME + '_genprova_bl.pkl', 'wb'), cPickle.HIGHEST_PROTOCOL)

    fg = ROC_plotter(bl_curves, title=r'DL1 vs MV2c10 '+extratitle, min_eff = 0.5, max_eff=1.0, logscale=True, ymax = 10000)
    fg.savefig('ROC_' + iptagger + '_' + MODEL_NAME + '_' + extratitle +'_genprova_bl.pdf')

    fin1 = np.isfinite(np.log(yhat[:, 2] / yhat[:, 1])[bc_sel])
    bc_curves = {}
    add_curve(r'DL1' + iptagger, 'green', 
          calculate_roc( y[bc_sel][fin1] == 5, np.log(yhat[:, 2] / yhat[:, 1])[bc_sel][fin1]),
          bc_curves)
    add_curve(r'MV2c10', 'red', 
          calculate_roc( y[bc_sel] == 5, mv2c10[bc_sel]),
          bc_curves)
    cPickle.dump(bc_curves, open('ROC_' + iptagger + '_' + MODEL_NAME + '_genprova_bc.pkl', 'wb'), cPickle.HIGHEST_PROTOCOL)

    fg = ROC_plotter(bc_curves, title=r'DL1 vs MV2c10 ' + extratitle, min_eff = 0.5, max_eff=1.0, logscale=True, ymax = 100)
    fg.savefig('ROC_' + iptagger + '_' + MODEL_NAME + '_' + extratitle +'_genprova_bc.pdf')
    plt.close(fg)

    def find_nearest(array, value):
        return (np.abs(array-value)).argmin()

    return {extratitle : 
        {
            'DL1_70_bl' : bl_curves[r'DL1' + iptagger]['rejection'][find_nearest(bl_curves[r'DL1' + iptagger]['efficiency'], 0.7)],
            'DL1_70_bc' : bc_curves[r'DL1' + iptagger]['rejection'][find_nearest(bc_curves[r'DL1' + iptagger]['efficiency'], 0.7)],
            'MV2_70_bl' : bl_curves[r'MV2c10']['rejection'][find_nearest(bl_curves[r'MV2c10']['efficiency'], 0.7)],
            'MV2_70_bc' : bc_curves[r'MV2c10']['rejection'][find_nearest(bc_curves[r'MV2c10']['efficiency'], 0.7)]
        }
    }
Example #7
0
def plot_ROC(y_test, yhat, ip3d, run_name):
    ''' 
    Args:
    -----
        y_test: the truth labels for the test set
        yhat: the predicted probabilities of each class in the test set
        ip3d:
        run_name:
    '''
    from viz import calculate_roc, ROC_plotter, add_curve
    logger = logging.getLogger("plot ROC")

    # -- bring classes back to usual format: [0,2,3,0,1,2,0,2,2,...]
    y = np.array([np.argmax(ev) for ev in y_test])

    # -- for b VS. light
    bl_sel = (y == 0) | (y == 2)
    # -- for c VS. light
    cl_sel = (y == 0) | (y == 1)
    # -- for b VS. c
    bc_sel = (y == 1) | (y == 2)

    # -- add ROC curves
    discs = {}
    add_curve(
        r'IP3D', 'black',
        calculate_roc(
            (y[bl_sel & np.isfinite(
                np.log(ip3d.jet_ip3d_pb / ip3d.jet_ip3d_pu).values)] == 2),
            np.log(ip3d.jet_ip3d_pb / ip3d.jet_ip3d_pu)[bl_sel & np.isfinite(
                np.log(ip3d.jet_ip3d_pb / ip3d.jet_ip3d_pu).values)]), discs)
    add_curve(
        'RNNIP', 'blue',
        calculate_roc(
            (y[bl_sel & np.isfinite(np.log(yhat[:, 2] / yhat[:, 0]))] == 2),
            np.log(
                yhat[:, 2] /
                yhat[:, 0])[bl_sel
                            & np.isfinite(np.log(yhat[:, 2] / yhat[:, 0]))]),
        discs)

    discs_bc = {}
    add_curve(
        r'IP3D', 'black',
        calculate_roc(
            (y[bc_sel & np.isfinite(
                np.log(ip3d.jet_ip3d_pb / ip3d.jet_ip3d_pc).values)] == 2),
            np.log(ip3d.jet_ip3d_pb / ip3d.jet_ip3d_pc)[bc_sel & np.isfinite(
                np.log(ip3d.jet_ip3d_pb / ip3d.jet_ip3d_pc).values)]),
        discs_bc)
    add_curve(
        'RNNIP', 'blue',
        calculate_roc(
            (y[bc_sel & np.isfinite(np.log(yhat[:, 2] / yhat[:, 1]))] == 2),
            np.log(
                yhat[:, 2] /
                yhat[:, 1])[bc_sel
                            & np.isfinite(np.log(yhat[:, 2] / yhat[:, 1]))]),
        discs_bc)
    logger.info('Pickling ROC curves')
    safe_mkdir('roc_pickles')
    cPickle.dump(discs['RNNIP'],
                 open(os.path.join('roc_pickles', run_name + '_bl.pkl'), 'wb'),
                 cPickle.HIGHEST_PROTOCOL)
    cPickle.dump(discs_bc['RNNIP'],
                 open(os.path.join('roc_pickles', run_name + '_bc.pkl'), 'wb'),
                 cPickle.HIGHEST_PROTOCOL)
    logger.info('Plotting')
    safe_mkdir('plots')
    fg = ROC_plotter(discs,
                     title=r'Impact Parameter Taggers',
                     min_eff=0.5,
                     max_eff=1.0,
                     logscale=True)
    fg.savefig(os.path.join('plots', 'roc' + run_name + '.pdf'))
    fg = ROC_plotter(discs_bc,
                     title=r'Impact Parameter Taggers',
                     min_eff=0.5,
                     max_eff=1.0,
                     logscale=True,
                     ymax=10**2)
    fg.savefig(os.path.join('plots', 'roc' + run_name + '_bc.pdf'))
Example #8
0
def performance(yhat, y, mv2c10, iptagger, extratitle=''):
    # -- Find flavors after applying cuts:
    bl_sel = (y == 5) | (y == 0)
    cl_sel = (y == 4) | (y == 0)
    bc_sel = (y == 5) | (y == 4)

    fin1 = np.isfinite(np.log(yhat[:, 2] / yhat[:, 0])[bl_sel])
    bl_curves = {}
    add_curve(
        r'DL1' + iptagger, 'green',
        calculate_roc(y[bl_sel][fin1] == 5,
                      np.log(yhat[:, 2] / yhat[:, 0])[bl_sel][fin1]),
        bl_curves)
    add_curve(r'MV2c10', 'red', calculate_roc(y[bl_sel] == 5, mv2c10[bl_sel]),
              bl_curves)
    cPickle.dump(
        bl_curves,
        open('ROC_' + iptagger + '_' + MODEL_NAME + '_genprova_bl.pkl', 'wb'),
        cPickle.HIGHEST_PROTOCOL)

    fg = ROC_plotter(bl_curves,
                     title=r'DL1 vs MV2c10 ' + extratitle,
                     min_eff=0.5,
                     max_eff=1.0,
                     logscale=True,
                     ymax=10000)
    fg.savefig('ROC_' + iptagger + '_' + MODEL_NAME + '_' + extratitle +
               '_genprova_bl.pdf')

    fin1 = np.isfinite(np.log(yhat[:, 2] / yhat[:, 1])[bc_sel])
    bc_curves = {}
    add_curve(
        r'DL1' + iptagger, 'green',
        calculate_roc(y[bc_sel][fin1] == 5,
                      np.log(yhat[:, 2] / yhat[:, 1])[bc_sel][fin1]),
        bc_curves)
    add_curve(r'MV2c10', 'red', calculate_roc(y[bc_sel] == 5, mv2c10[bc_sel]),
              bc_curves)
    cPickle.dump(
        bc_curves,
        open('ROC_' + iptagger + '_' + MODEL_NAME + '_genprova_bc.pkl', 'wb'),
        cPickle.HIGHEST_PROTOCOL)

    fg = ROC_plotter(bc_curves,
                     title=r'DL1 vs MV2c10 ' + extratitle,
                     min_eff=0.5,
                     max_eff=1.0,
                     logscale=True,
                     ymax=100)
    fg.savefig('ROC_' + iptagger + '_' + MODEL_NAME + '_' + extratitle +
               '_genprova_bc.pdf')
    plt.close(fg)

    def find_nearest(array, value):
        return (np.abs(array - value)).argmin()

    return {
        extratitle: {
            'DL1_70_bl':
            bl_curves[r'DL1' + iptagger]['rejection'][find_nearest(
                bl_curves[r'DL1' + iptagger]['efficiency'], 0.7)],
            'DL1_70_bc':
            bc_curves[r'DL1' + iptagger]['rejection'][find_nearest(
                bc_curves[r'DL1' + iptagger]['efficiency'], 0.7)],
            'MV2_70_bl':
            bl_curves[r'MV2c10']['rejection'][find_nearest(
                bl_curves[r'MV2c10']['efficiency'], 0.7)],
            'MV2_70_bc':
            bc_curves[r'MV2c10']['rejection'][find_nearest(
                bc_curves[r'MV2c10']['efficiency'], 0.7)]
        }
    }
Example #9
0
def plot_ROC(y_test, yhat, ip3d, run_name):
    ''' 
    Args:
    -----
        y_test: the truth labels for the test set
        yhat: the predicted probabilities of each class in the test set
        ip3d:
        run_name:
    '''
    from viz import calculate_roc, ROC_plotter, add_curve
    logger = logging.getLogger("plot ROC")

    # -- bring classes back to usual format: [0,2,3,0,1,2,0,2,2,...]
    y = np.array([np.argmax(ev) for ev in y_test])
    
    # -- for b VS. light
    bl_sel = (y == 0) | (y == 2)
    # -- for c VS. light
    cl_sel = (y == 0) | (y == 1)
    # -- for b VS. c
    bc_sel = (y == 1) | (y == 2)

    # -- add ROC curves
    discs = {}
    add_curve(r'IP3D', 'black', 
        calculate_roc(
            (y[ bl_sel & np.isfinite(np.log(ip3d.jet_ip3d_pb/ip3d.jet_ip3d_pu).values) ] == 2), 
            np.log(ip3d.jet_ip3d_pb/ip3d.jet_ip3d_pu)[ bl_sel & np.isfinite(np.log(ip3d.jet_ip3d_pb/ip3d.jet_ip3d_pu).values) ]
        ),
        discs
    )
    add_curve('RNNIP', 'blue', 
        calculate_roc(
            (y[ bl_sel & np.isfinite(np.log(yhat[:,2] / yhat[:,0]))] == 2), 
            np.log(yhat[:,2] / yhat[:,0])[bl_sel &  np.isfinite(np.log(yhat[:,2] / yhat[:,0]))]
        ), 
        discs
    )

    discs_bc = {}
    add_curve(r'IP3D', 'black', 
        calculate_roc(
            (y[ bc_sel & np.isfinite(np.log(ip3d.jet_ip3d_pb/ip3d.jet_ip3d_pc).values) ] == 2), 
            np.log(ip3d.jet_ip3d_pb/ip3d.jet_ip3d_pc)[ bc_sel & np.isfinite(np.log(ip3d.jet_ip3d_pb/ip3d.jet_ip3d_pc).values) ]
        ), 
        discs_bc
    ) 
    add_curve('RNNIP', 'blue',
        calculate_roc(
            (y[ bc_sel & np.isfinite(np.log(yhat[:,2] / yhat[:,1]))] == 2), 
            np.log(yhat[:,2] / yhat[:,1])[bc_sel &  np.isfinite(np.log(yhat[:,2] / yhat[:,1]))]
        ), 
        discs_bc
    )
    logger.info('Pickling ROC curves')
    safe_mkdir('roc_pickles')
    cPickle.dump(
        discs['RNNIP'], open(os.path.join('roc_pickles', run_name +'_bl.pkl'), 'wb'),
        cPickle.HIGHEST_PROTOCOL
    )
    cPickle.dump(
        discs_bc['RNNIP'], open(os.path.join('roc_pickles', run_name +'_bc.pkl'), 'wb'),
        cPickle.HIGHEST_PROTOCOL
    )
    logger.info('Plotting')
    safe_mkdir('plots')
    fg = ROC_plotter(
        discs, title=r'Impact Parameter Taggers',
        min_eff = 0.5, max_eff=1.0, logscale=True
    )
    fg.savefig(os.path.join('plots', 'roc' + run_name +'.pdf'))
    fg = ROC_plotter(
        discs_bc, title=r'Impact Parameter Taggers',
        min_eff = 0.5, max_eff=1.0, logscale=True, ymax=10**2
    )
    fg.savefig(os.path.join('plots', 'roc' + run_name +'_bc.pdf'))