unbiased_out = unbiased_model.predict_proba(data[unbiased_features].values)[:, 1] unbiased_out[np.isnan(unbiased_out)] = -999 #happens rarely, but happens data['biased_out'] = biased_out data['unbiased_out'] = unbiased_out data['gen_match_george'] = False data['trk_match_george'] = False to_dump.append('unbiased_out') to_dump.append('biased_out') data[data.is_e][to_dump].to_csv( '/afs/cern.ch/work/m/mverzett/public/george_sync_10_2.csv', index=False) limited = data[data.is_e][to_dump] raw = dsets.get_data_sync('debug', list(fields - {'trk_dxy_sig'})) raw = pd.DataFrame(raw) #raw = raw[raw.is_e] passed = set(zip(limited.lumi, limited.evt)) raw_passed = set(zip(raw.lumi, raw.evt)) wp = { 'biased': { "L": -0.48, "M": 0.76, "T": 1.83, }, 'unbiased': { "L": 1.03, "M": 1.75, "T": 2.61,
dataset = args.dataset mods = '%s/src/LowPtElectrons/LowPtElectrons/macros/models/%s/' % ( os.environ['CMSSW_BASE'], tag) if not os.path.isdir(mods): os.makedirs(mods) plots = '%s/src/LowPtElectrons/LowPtElectrons/macros/plots/%s/' % ( os.environ['CMSSW_BASE'], tag) if not os.path.isdir(plots): os.makedirs(plots) print 'Getting dataset "{:s}"...'.format(dataset) data = pd.DataFrame( get_data_sync(dataset, [ 'gen_pt', 'gen_eta', 'trk_pt', 'trk_eta', 'evt', 'is_e', 'is_e_not_matched', 'is_other', 'is_egamma' ])) print '...Done' data = data[np.invert(data.is_egamma)] # remove EGamma electrons data = data[np.invert(data.is_e_not_matched)] #remove non-matched electrons #remove things that do not yield tracks data.gsf_pt = data.trk_pt #@@ data.gsf_eta = data.trk_eta #@@ data = data[(data.trk_pt > 0) & (np.abs(data.trk_eta) < 2.4) & (data.trk_pt < 15)] data['log_trkpt'] = np.log10(data.trk_pt) # original_weight = HistWeighter('../data/fakesWeights.txt') data[ 'original_weight'] = 1. #np.invert(data.is_e)*original_weight.get_weight(data.log_trkpt, data.trk_eta)+data.is_e
rc('font',**{'family':'sans-serif','sans-serif':['Helvetica']}) rc('text', usetex=True) from datasets import get_data, tag, apply_weight, get_data_sync import os mods = '%s/src/LowPtElectrons/LowPtElectrons/macros/models/%s' % (os.environ['CMSSW_BASE'], tag) if not os.path.isdir(mods): os.makedirs(mods) plots = '%s/src/LowPtElectrons/LowPtElectrons/macros/plots/%s' % (os.environ['CMSSW_BASE'], tag) if not os.path.isdir(plots): os.makedirs(plots) print 'Getting dataset "{:s}"...'.format(dataset) data = pd.DataFrame( get_data_sync(dataset, ['trk_pt', 'trk_eta', 'is_e', 'is_e_not_matched', 'is_other']) ) print '...Done' # manipulate dataframe data = data[np.invert(data.is_e_not_matched)] data = data[(data.trk_pt > 0) & (np.abs(data.trk_eta) < 2.4) & (data.trk_pt < 15)] data['log_trkpt'] = np.log10(data.trk_pt) # (logpt,eta) range to consider x_bins = 40 x_min = -2. x_max = 2. x_range = np.linspace(x_min, x_max, x_bins, endpoint=False) y_bins = 12 y_min = -3.
return ret.__repr__() return super(EfficiencyEncoder, self).default(obj) jinfo = {} for dataset in ['BToKeeByDR', 'BToKeeByHits' ] if not args.test else ['current_test']: jmap_efficiencies = {} if args.test: input_files['current_test'] = glob(args.test) print 'plotting for', dataset mc = pd.DataFrame( get_data_sync(dataset, 'all', exclude={ 'gsf_hit_dpt', 'gsf_hit_dpt_unc', 'gsf_ecal_cluster_ematrix', 'ktf_ecal_cluster_ematrix' })) mc['baseline'] = (mc.preid_trk_ecal_match | (np.invert(mc.preid_trk_ecal_match) & mc.preid_trkfilter_pass & mc.preid_mva_pass)) electrons = mc[mc.is_e == 1 & (np.abs(mc.gen_eta) < 2.4)] if not args.allTracks else mc[ (np.abs(mc.trk_eta) < 2.4)] if args.fakes: electrons = mc[mc.is_other == 1 & (np.abs(mc.trk_eta) < 2.4) & (mc.trk_pt > 0.)] histos = {} seedings = [