unbiased_out = unbiased_model.predict_proba(data[unbiased_features].values)[:,
                                                                            1]
unbiased_out[np.isnan(unbiased_out)] = -999  #happens rarely, but happens

data['biased_out'] = biased_out
data['unbiased_out'] = unbiased_out
data['gen_match_george'] = False
data['trk_match_george'] = False

to_dump.append('unbiased_out')
to_dump.append('biased_out')
data[data.is_e][to_dump].to_csv(
    '/afs/cern.ch/work/m/mverzett/public/george_sync_10_2.csv', index=False)
limited = data[data.is_e][to_dump]
raw = dsets.get_data_sync('debug', list(fields - {'trk_dxy_sig'}))
raw = pd.DataFrame(raw)
#raw = raw[raw.is_e]
passed = set(zip(limited.lumi, limited.evt))
raw_passed = set(zip(raw.lumi, raw.evt))

wp = {
    'biased': {
        "L": -0.48,
        "M": 0.76,
        "T": 1.83,
    },
    'unbiased': {
        "L": 1.03,
        "M": 1.75,
        "T": 2.61,
    dataset = args.dataset

mods = '%s/src/LowPtElectrons/LowPtElectrons/macros/models/%s/' % (
    os.environ['CMSSW_BASE'], tag)
if not os.path.isdir(mods):
    os.makedirs(mods)

plots = '%s/src/LowPtElectrons/LowPtElectrons/macros/plots/%s/' % (
    os.environ['CMSSW_BASE'], tag)
if not os.path.isdir(plots):
    os.makedirs(plots)

print 'Getting dataset "{:s}"...'.format(dataset)
data = pd.DataFrame(
    get_data_sync(dataset, [
        'gen_pt', 'gen_eta', 'trk_pt', 'trk_eta', 'evt', 'is_e',
        'is_e_not_matched', 'is_other', 'is_egamma'
    ]))
print '...Done'
data = data[np.invert(data.is_egamma)]  # remove EGamma electrons
data = data[np.invert(data.is_e_not_matched)]  #remove non-matched electrons
#remove things that do not yield tracks
data.gsf_pt = data.trk_pt  #@@
data.gsf_eta = data.trk_eta  #@@
data = data[(data.trk_pt > 0) & (np.abs(data.trk_eta) < 2.4) &
            (data.trk_pt < 15)]
data['log_trkpt'] = np.log10(data.trk_pt)

# original_weight = HistWeighter('../data/fakesWeights.txt')
data[
    'original_weight'] = 1.  #np.invert(data.is_e)*original_weight.get_weight(data.log_trkpt, data.trk_eta)+data.is_e
Exemple #3
0
rc('font',**{'family':'sans-serif','sans-serif':['Helvetica']})
rc('text', usetex=True)
from datasets import get_data, tag, apply_weight, get_data_sync
import os

mods = '%s/src/LowPtElectrons/LowPtElectrons/macros/models/%s' % (os.environ['CMSSW_BASE'], tag)
if not os.path.isdir(mods):
   os.makedirs(mods)

plots = '%s/src/LowPtElectrons/LowPtElectrons/macros/plots/%s' % (os.environ['CMSSW_BASE'], tag)
if not os.path.isdir(plots):
   os.makedirs(plots)

print 'Getting dataset "{:s}"...'.format(dataset)
data = pd.DataFrame(
   get_data_sync(dataset, ['trk_pt', 'trk_eta', 'is_e', 'is_e_not_matched', 'is_other'])
)
print '...Done'

# manipulate dataframe 
data = data[np.invert(data.is_e_not_matched)]
data = data[(data.trk_pt > 0) & (np.abs(data.trk_eta) < 2.4) & (data.trk_pt < 15)]
data['log_trkpt'] = np.log10(data.trk_pt)

# (logpt,eta) range to consider
x_bins = 40
x_min = -2.
x_max = 2.
x_range = np.linspace(x_min, x_max, x_bins, endpoint=False)
y_bins = 12
y_min = -3.
Exemple #4
0
            return ret.__repr__()
        return super(EfficiencyEncoder, self).default(obj)


jinfo = {}
for dataset in ['BToKeeByDR', 'BToKeeByHits'
                ] if not args.test else ['current_test']:
    jmap_efficiencies = {}
    if args.test:
        input_files['current_test'] = glob(args.test)
    print 'plotting for', dataset
    mc = pd.DataFrame(
        get_data_sync(dataset,
                      'all',
                      exclude={
                          'gsf_hit_dpt', 'gsf_hit_dpt_unc',
                          'gsf_ecal_cluster_ematrix',
                          'ktf_ecal_cluster_ematrix'
                      }))
    mc['baseline'] = (mc.preid_trk_ecal_match |
                      (np.invert(mc.preid_trk_ecal_match)
                       & mc.preid_trkfilter_pass & mc.preid_mva_pass))

    electrons = mc[mc.is_e == 1
                   & (np.abs(mc.gen_eta) < 2.4)] if not args.allTracks else mc[
                       (np.abs(mc.trk_eta) < 2.4)]
    if args.fakes:
        electrons = mc[mc.is_other == 1 & (np.abs(mc.trk_eta) < 2.4)
                       & (mc.trk_pt > 0.)]
    histos = {}
    seedings = [