def convert_bdt__AdaBoost(sklearn_bdt_clf, input_var_list, tmva_outfile_xml): # classificator clf = sklearn_bdt_clf if clf.n_classes_ != 2: sys.exit("Error: Number of classes in sklearn classifier is not equal 2.") # Order of variables must be _exactly_ as in the training numpy array # E.g. # var_list = [ # ('m_el_pt', 'F'), # ('m_el_eta', 'F'), # ('m_el_sigd0PV', 'F'), # ('m_el_z0SinTheta', 'F'), # ('m_el_etcone20Dpt', 'F'), # ('m_el_ptcone20Dpt', 'F') # ] var_list = input_var_list # Run-time parameters NTrees = clf.n_estimators #<MethodSetup Method="BDT::BDT"> # <GeneralInfo> # <Info name="Creator" value="musthero"/> # <Info name="AnalysisType" value="Classification"/> # <Options> # <Option name="NodePurityLimit" modified="No">5.000000e-01</Option> #<Weights NTrees="2" AnalysisType="0"> #------------- # <MethodSetup> MethodSetup = ET.Element("MethodSetup", Method="BDT::BDT") #<Variables> Variables = ET.SubElement(MethodSetup, "Variables", NVar=str(len(var_list))) for ind, val in enumerate(var_list): name = val[0] var_type = val[1] Variable = ET.SubElement(Variables, "Variable", VarIndex=str(ind), Type=val[1], Expression=name, Label=name, Title=name, Unit="", Internal=name, Min="0.0e+00", Max="0.0e+00") # <GeneralInfo> GeneralInfo = ET.SubElement(MethodSetup, "GeneralInfo") Info_Creator = ET.SubElement(GeneralInfo, "Info", name="Creator", value="Koza4ok (skTMVA)") Info_AnalysisType = ET.SubElement(GeneralInfo, "Info", name="AnalysisType", value="Classification") # <Options> Options = ET.SubElement(MethodSetup, "Options") Option_NodePurityLimit = ET.SubElement(Options, "Option", name="NodePurityLimit", modified="No").text = str(NodePurityLimit) Option_BoostType = ET.SubElement(Options, "Option", name="BoostType", modified="Yes").text = "AdaBoost" # <Weights> Weights = ET.SubElement(MethodSetup, "Weights", NTrees=str(NTrees), AnalysisType="0") for idx, dt in enumerate(clf.estimators_): tree_weight = clf.estimator_weights_[idx] # <BinaryTree type="DecisionTree" boostWeight="9.2106320437773737e-01" itree="0"> BinaryTree = ET.SubElement(Weights, "BinaryTree", type="DecisionTree", boostWeight=str(tree_weight), itree=str(idx)) build_xml_tree__AdaBoost(dt, 0, "s", -1, BinaryTree) # Create XML-tree structure and save it to file tree = ET.ElementTree(MethodSetup) tree.write(tmva_outfile_xml)
def convert_bdt__Grad(sklearn_bdt_clf, input_var_list, tmva_outfile_xml): # classificator clf = sklearn_bdt_clf if clf.loss_.K != 1: sys.exit("Error: Only binary classification is supported for regression trees.") if clf.n_classes_ != 2: sys.exit("Error: Number of classes in sklearn classifier is not equal 2.") # Order of variables must be _exactly_ as in the training numpy array # E.g. # var_list = [ # ('m_el_pt', 'F'), # ('m_el_eta', 'F'), # ('m_el_sigd0PV', 'F'), # ('m_el_z0SinTheta', 'F'), # ('m_el_etcone20Dpt', 'F'), # ('m_el_ptcone20Dpt', 'F') # ] var_list = input_var_list # Run-time parameters NTrees = clf.n_estimators #<MethodSetup Method="BDT::BDT"> # <GeneralInfo> # <Info name="Creator" value="musthero"/> # <Info name="AnalysisType" value="Classification"/> # <Options> # <Option name="NodePurityLimit" modified="No">5.000000e-01</Option> #<Weights NTrees="2" AnalysisType="0"> #------------- # <MethodSetup> MethodSetup = ET.Element("MethodSetup", Method="BDT::BDT") #<Variables> Variables = ET.SubElement(MethodSetup, "Variables", NVar=str(len(var_list))) for ind, val in enumerate(var_list): name = val[0] var_type = val[1] Variable = ET.SubElement(Variables, "Variable", VarIndex=str(ind), Type=val[1], Expression=name, Label=name, Title=name, Unit="", Internal=name, Min="0.0e+00", Max="0.0e+00") # <GeneralInfo> GeneralInfo = ET.SubElement(MethodSetup, "GeneralInfo") Info_Creator = ET.SubElement(GeneralInfo, "Info", name="Creator", value="Koza4ok (skTMVA)") Info_AnalysisType = ET.SubElement(GeneralInfo, "Info", name="AnalysisType", value="Classification") # <Options> Options = ET.SubElement(MethodSetup, "Options") Option_NodePurityLimit = ET.SubElement(Options, "Option", name="NodePurityLimit", modified="No").text = str(NodePurityLimit) Option_BoostType = ET.SubElement(Options, "Option", name="BoostType", modified="Yes").text = "Grad" # <Weights> Weights = ET.SubElement(MethodSetup, "Weights", NTrees=str(NTrees), AnalysisType="1") # We support only binary classification # from http://scikit-learn.org/stable/modules/generated/sklearn.ensemble.GradientBoostingClassifier.html # estimators_ : ndarray of DecisionTreeRegressor, shape = [n_estimators, loss_.K] # where loss_.K is 1 for binary classification, otherwise n_classes. for idx, dt in enumerate(clf.estimators_[:, 0]): # <BinaryTree type="DecisionTree" boostWeight="9.2106320437773737e-01" itree="0"> BinaryTree = ET.SubElement(Weights, "BinaryTree", type="DecisionTree", boostWeight="1.0e+00", itree=str(idx)) build_xml_tree__Grad(dt, 0, "s", -1, BinaryTree) # Create XML-tree structure and save it to file tree = ET.ElementTree(MethodSetup) tree.write(tmva_outfile_xml)
var_type = val[1] Variable = ET.SubElement(Variables, "Variable", VarIndex=str(ind), Type=val[1], Expression=name, Label=name, Title=name, Unit="", Internal=name, Min="0.0e+00", Max="0.0e+00") # <GeneralInfo> GeneralInfo = ET.SubElement(MethodSetup, "GeneralInfo") Info_Creator = ET.SubElement(GeneralInfo, "Info", name="Creator", value="Koza4ok (skTMVA)") Info_AnalysisType = ET.SubElement(GeneralInfo, "Info", name="AnalysisType", value="Classification") # <Options> Options = ET.SubElement(MethodSetup, "Options") Option_NodePurityLimit = ET.SubElement(Options, "Option", name="NodePurityLimit", modified="No").text = str(NodePurityLimit) # <Weights> Weights = ET.SubElement(MethodSetup, "Weights", NTrees=str(NTrees), AnalysisType="0") for idx, dt in enumerate(clf.estimators_): tree_weight = clf.estimator_weights_[idx] # <BinaryTree type="DecisionTree" boostWeight="9.2106320437773737e-01" itree="0"> BinaryTree = ET.SubElement(Weights, "BinaryTree", type="DecisionTree", boostWeight=str(tree_weight), itree=str(idx)) build_xml_tree(dt, 0, "s", -1, BinaryTree) tree = ET.ElementTree(MethodSetup) tree.write("SKLearn_BDT_muons.weights.xml") # Save to file fpr, tpr #np.savez('output_fullsim_v3_electrons_fpr_tpr_10per.npz', # fpr=fpr, tpr=tpr)
def convert_bdt__XGBClassifier(sklearn_bdt_clf, input_var_list, tmva_outfile_xml): # classificator clf = sklearn_bdt_clf #clf = pickle.dumps(sklearn_bdt_clf) if clf.n_classes_ != 2: sys.exit( "Error: Number of classes in sklearn classifier is not equal 2.") # Order of variables must be _exactly_ as in the training numpy array # E.g. # var_list = [ # ('m_el_pt', 'F'), # ('m_el_eta', 'F'), # ('m_el_sigd0PV', 'F'), # ('m_el_z0SinTheta', 'F'), # ('m_el_etcone20Dpt', 'F'), # ('m_el_ptcone20Dpt', 'F') # ] var_list = input_var_list # Run-time parameters NTrees = clf.n_estimators #<MethodSetup Method="BDT::BDT"> # <GeneralInfo> # <Info name="Creator" value="musthero"/> # <Info name="AnalysisType" value="Classification"/> # <Options> # <Option name="NodePurityLimit" modified="No">5.000000e-01</Option> #<Weights NTrees="2" AnalysisType="0"> #------------- # <MethodSetup> MethodSetup = ET.Element("MethodSetup", Method="BDT::BDT") #<Variables> Variables = ET.SubElement(MethodSetup, "Variables", NVar=str(len(var_list))) for ind, val in enumerate(var_list): name = val[0] var_type = val[1] Variable = ET.SubElement(Variables, "Variable", VarIndex=str(ind), Type=val[1], Expression=name, Label=name, Title=name, Unit="", Internal=name, Min="0.0e+00", Max="0.0e+00") # <GeneralInfo> GeneralInfo = ET.SubElement(MethodSetup, "GeneralInfo") Info_Creator = ET.SubElement(GeneralInfo, "Info", name="Creator", value="Koza4ok (skTMVA)") Info_AnalysisType = ET.SubElement(GeneralInfo, "Info", name="AnalysisType", value="Classification") # <Options> Options = ET.SubElement(MethodSetup, "Options") Option_NodePurityLimit = ET.SubElement( Options, "Option", name="NodePurityLimit", modified="No").text = str(NodePurityLimit) Option_BoostType = ET.SubElement(Options, "Option", name="BoostType", modified="Yes").text = "AdaBoost" # <Weights> Weights = ET.SubElement(MethodSetup, "Weights", NTrees=str(NTrees), AnalysisType="0") #for idx, dt in enumerate(clf.estimators_): for idx in range(NTrees): tree_weight = clf.evals_result()['validation_0']['logloss'][ idx] #clf.estimator_weights_[itree] # <BinaryTree type="DecisionTree" boostWeight="9.2106320437773737e-01" itree="0"> BinaryTree = ET.SubElement(Weights, "BinaryTree", type="DecisionTree", boostWeight=str(tree_weight), itree=str(idx)) #BinaryTree = ET.SubElement(Weights, "BinaryTree", type="DecisionTree", boostWeight="1.0e+00", itree=str(idx)) build_xml_tree__XGBClassifier(sklearn_bdt_clf, 0, "s", -1, BinaryTree) # Create XML-tree structure and save it to file tree = ET.ElementTree(MethodSetup) tree.write(tmva_outfile_xml)
def convert_bdt__Grad(sklearn_bdt_clf, input_var_list, tmva_outfile_xml): # classificator clf = sklearn_bdt_clf if clf.loss_.K != 1: sys.exit( "Error: Only binary classification is supported for regression trees." ) if clf.n_classes_ != 2: sys.exit( "Error: Number of classes in sklearn classifier is not equal 2.") # Order of variables must be _exactly_ as in the training numpy array # E.g. # var_list = [ # ('m_el_pt', 'F'), # ('m_el_eta', 'F'), # ('m_el_sigd0PV', 'F'), # ('m_el_z0SinTheta', 'F'), # ('m_el_etcone20Dpt', 'F'), # ('m_el_ptcone20Dpt', 'F') # ] var_list = input_var_list # Run-time parameters NTrees = clf.n_estimators #<MethodSetup Method="BDT::BDT"> # <GeneralInfo> # <Info name="Creator" value="musthero"/> # <Info name="AnalysisType" value="Classification"/> # <Options> # <Option name="NodePurityLimit" modified="No">5.000000e-01</Option> #<Weights NTrees="2" AnalysisType="0"> #------------- # <MethodSetup> MethodSetup = ET.Element("MethodSetup", Method="BDT::BDT") #<Variables> Variables = ET.SubElement(MethodSetup, "Variables", NVar=str(len(var_list))) for ind, val in enumerate(var_list): name = val[0] var_type = val[1] Variable = ET.SubElement(Variables, "Variable", VarIndex=str(ind), Type=val[1], Expression=name, Label=name, Title=name, Unit="", Internal=name, Min="0.0e+00", Max="0.0e+00") # <GeneralInfo> GeneralInfo = ET.SubElement(MethodSetup, "GeneralInfo") Info_Creator = ET.SubElement(GeneralInfo, "Info", name="Creator", value="Koza4ok (skTMVA)") Info_AnalysisType = ET.SubElement(GeneralInfo, "Info", name="AnalysisType", value="Classification") # <Options> Options = ET.SubElement(MethodSetup, "Options") Option_NodePurityLimit = ET.SubElement( Options, "Option", name="NodePurityLimit", modified="No").text = str(NodePurityLimit) Option_BoostType = ET.SubElement(Options, "Option", name="BoostType", modified="Yes").text = "Grad" # <Weights> Weights = ET.SubElement(MethodSetup, "Weights", NTrees=str(NTrees), AnalysisType="1") # We support only binary classification # from http://scikit-learn.org/stable/modules/generated/sklearn.ensemble.GradientBoostingClassifier.html # estimators_ : ndarray of DecisionTreeRegressor, shape = [n_estimators, loss_.K] # where loss_.K is 1 for binary classification, otherwise n_classes. for idx, dt in enumerate(clf.estimators_[:, 0]): # <BinaryTree type="DecisionTree" boostWeight="9.2106320437773737e-01" itree="0"> BinaryTree = ET.SubElement(Weights, "BinaryTree", type="DecisionTree", boostWeight="1.0e+00", itree=str(idx)) build_xml_tree__Grad(dt, 0, "s", -1, BinaryTree) # Create XML-tree structure and save it to file tree = ET.ElementTree(MethodSetup) tree.write(tmva_outfile_xml)
build_xml_tree(children_right[node_id], "r", node_depth, node_elementTree) else: # leaf node node_depth = parent_depth + 1 # node parameters pos = "s" if node_id == 0 else node_pos depth = node_depth IVar = -1 global NodePurityLimit sig = value[node_id][0][1] bkg = value[node_id][0][0] total = float(sig + bkg) purity = float(sig)/total nType = 1 if purity >= NodePurityLimit else -1 node_elementTree = ET.SubElement(parent_elementTree, "Node", pos=pos, depth=str(depth), NCoef="0", IVar=str(IVar), Cut="0.0e+00", cType="1", res="0.0e+01", rms="0.0e+00", purity=str(purity), nType=str(nType)) BinaryTree = ET.Element("BinaryTree") build_xml_tree(0, "s", -1, BinaryTree) tree = ET.ElementTree(BinaryTree) tree.write("filename.xml") # Save to file fpr, tpr #np.savez('output_fullsim_v3_electrons_fpr_tpr_10per.npz', # fpr=fpr, tpr=tpr)