bres_regions = np.array([ 'b_res_20p70>=%0.3f'%resolution_array[i]+'& b_res_20p70<%0.3f'%resolution_array[i+1] for i in range(len(resolution_array)-1)]) branch_names = [c.strip() for c in branch_names] features = [c.strip() for c in features] features_cat = [c.strip() for c in features_cat] region_names =bres_regions for i_r,region in enumerate(bres_regions): cuts = base_cuts+'&'+region print cuts data_frame = (rpd.read_root(utils.IO.featuresName[0],treeName, columns = branch_names)).query(cuts) X_features = preprocessing.set_features(treeName,branch_names,features,cuts) X_features_cat = (preprocessing.set_features(treeName,branch_names,features_cat,cuts)) X_test_features = preprocessing.get_test_sample(pd.DataFrame(X_features),0.) nTot,dictVar = postprocessing.stackFeaturesReg(data_frame,branch_names,5) true_pt = nTot[:,dictVar['Jet_mcPt']] predictions_pt_caterina = nTot[:,dictVar['Jet_pt_reg']] reco_pt = nTot[:,dictVar['Jet_pt']] rel_diff_caterina = true_pt/predictions_pt_caterina rel_diff_noreg = true_pt/reco_pt # outTags = ['quantile_minmax_02-07','quantile_04'] outTags = ['quantile_04'] X_predictions_compare = [] for num in range(len(outTags)): outTag = outTags[num]
#features = 'Jet_pt,Jet_eta,nPVs,Jet_mt,Jet_leadTrackPt,Jet_leptonPtRel,Jet_leptonPt,Jet_leptonDeltaR,Jet_neHEF,Jet_neEmEF,Jet_vtxPt,Jet_vtxMass,Jet_vtx3dL,Jet_vtxNtrk,Jet_vtx3deL'.split(",") #same as Caterina #target = 'Jet_mcPt'.split(",") ####cuts='(Jet_mcPt > 20) & (Jet_eta<2.5 & Jet_eta>-2.5)' #cuts='(Jet_pt > 20) & (Jet_eta<2.5 & Jet_eta>-2.5) ' ######################################### branch_names = [c.strip() for c in branch_names] features = [c.strip() for c in features] target = [c.strip() for c in target] #print branch_names import pandas as pd import root_pandas as rpd X_features = preprocessing.set_features("tree", branch_names, features, cuts) X_target = preprocessing.set_target("tree", branch_names, target, cuts) X_features, X_target = preprocessing.randomize_ft(X_features, X_target) X_train_target = preprocessing.get_training_sample(pd.DataFrame(X_target), 0.9) X_train_features = preprocessing.get_training_sample(pd.DataFrame(X_features), 0.9) X_test_target = preprocessing.get_test_sample(pd.DataFrame(X_target), 0.9) X_test_features = preprocessing.get_test_sample(pd.DataFrame(X_features), 0.9) #now no need to split in test and training because i am not using testing here #X_train_target = preprocessing.get_test_sample(pd.DataFrame(X_target),0.) #0.8 #X_train_features = preprocessing.get_test_sample(pd.DataFrame(X_features),0.) from sklearn.externals import joblib