def Init(train, test, VarList):  # Used in Main above[46]
    # Setup TMVA
    ROOT.TMVA.Tools.Instance()
    ROOT.TMVA.PyMethodBase.PyInitialize()

    output = ROOT.TFile.Open('~/Data/NNOutput.root', 'RECREATE')
    factory = ROOT.TMVA.Factory(
        'TMVAClassification', output,
        '!V:!Silent:Color:DrawProgressBar:AnalysisType=Classification')
    dataloader = ROOT.TMVA.DataLoader('dataset')

    for Var in VarList:
        dataloader.AddVariable(Var)

    add_classification_events(dataloader,
                              train.Events,
                              train.OutTrue,
                              weights=train.Weights,
                              signal_label=1)  # from root_numpy.tmva
    add_classification_events(dataloader,
                              test.Events,
                              test.OutTrue,
                              weights=test.Weights,
                              signal_label=1,
                              test=True)

    dataloader.PrepareTrainingAndTestTree(ROOT.TCut(''),
                                          'SplitSeed=100')  # :NormMode=None
    #CrossCheck(dataloader)

    return dataloader, factory, output
    def train(self, train_data, classification_variables, variable_dict, sample_name, grid_search):
        """
        Definition:
        -----------
            Training method for RootTMVA; it saves the model into the "weights" sub-folder
        Args:
        -----
            train_data = dictionary, containing "X", "y", "w" for the training set, where:
                X = ndarray of dim (# training examples, # features)
                y = array of dim (# training examples) with target values
                w = array of dim (# training examples) with event weights
            classification_variables = list of names of variables used for classification
            variable_dict = ordered dict, mapping all the branches from the TTree to their type
            sample_name = string that specifies the file name of the sample being trained on
        """
        utils.ensure_directory(os.path.join(self.output_directory, sample_name, self.name))
        f_output = TFile(os.path.join(self.output_directory, sample_name, self.name, "TMVA_output.root"), "RECREATE")
        factory = TMVA.Factory("TMVAClassification", f_output, "AnalysisType=Classification")

        # -- Add variables to the factory:
        for v_name in classification_variables:
            factory.AddVariable(v_name, variable_dict[v_name])

        # Call root_numpy's utility functions to add events from the arrays
        add_classification_events(factory, train_data["X"], train_data["y"], weights=train_data["w"])
        add_classification_events(factory, train_data["X"], train_data["y"], weights=train_data["w"], test=True)  # need to add some testing events or TMVA will complain

        # The following line is necessary if events have been added individually:
        factory.PrepareTrainingAndTestTree(TCut("1"), "NormMode=EqualNumEvents")

        #-- Define methods:
        # ["NTrees=200", "MinNodeSize=0.1", "MaxDepth=6", "BoostType=Grad", "SeparationType=GiniIndex",  "NegWeightTreatment=IgnoreNegWeightsInTraining"]
        factory.BookMethod(TMVA.Types.kBDT, "BDT", ":".join(
            ["NTrees=300", "MinNodeSize=0.01", "MaxDepth=8", "BoostType=Grad", "SeparationType=GiniIndex",  "NegWeightTreatment=Pray"]
        ))

        # -- Have we considered using a Fisher classifier?
        # factory.BookMethod(TMVA.Types.kFisher, "Fisher", ":".join(
        #     ["VerbosityLevel=Info",  "IgnoreNegWeightsInTraining=False"]
        # ))

        # -- Where stuff actually happens:
        logging.getLogger("root_tmva").info("Train all methods")
        factory.TrainAllMethods()

        # -- Organize output:
        logging.getLogger("root_tmva").info("Organising output")
        if os.path.isdir(os.path.join(self.output_directory, sample_name, self.name, "weights")):
            shutil.rmtree(os.path.join(self.output_directory, sample_name, self.name, "weights"))
        shutil.move("weights", os.path.join(self.output_directory, sample_name, self.name))
Example #3
0
def tmva_process(classifier, info, data, labels, sample_weight):
    """
    Create TMVA classification factory, train, test and evaluate all methods

    :param classifier: classifier to train
    :type classifier: rep.estimators.tmva.TMVAClassifier or rep.estimators.tmva.TMVARegressor
    :param rep.estimators.tmva._AdditionalInformation info: additional information
    :param pandas.DataFrame data: train data
    :param labels: array-like - targets
    :param sample_weight: array-like - weights
    """

    ROOT.TMVA.Tools.Instance()

    file_out = ROOT.TFile(os.path.join(info.directory, info.tmva_root), "RECREATE")
    print(classifier.factory_options)
    factory = ROOT.TMVA.Factory(info.tmva_job, file_out, classifier.factory_options)

    for var in data.columns:
        factory.AddVariable(var)

    # Set data
    if info.model_type == "classification":
        if classifier.method == "kCuts":
            # signal must the first added tree, because rectangular cut optimization in another way doesn't work
            inds = numpy.argsort(labels)[::-1]
            data = data.ix[inds, :]
            labels = labels[inds]
            sample_weight = sample_weight[inds]
        add_classification_events(factory, numpy.array(data), labels, weights=sample_weight)
        add_classification_events(factory, numpy.array(data), labels, weights=sample_weight, test=True)
    elif info.model_type == "regression":
        factory.AddTarget("target")
        add_regression_events(factory, numpy.array(data), labels, weights=sample_weight)
        add_regression_events(factory, numpy.array(data), labels, weights=sample_weight, test=True)
    else:
        raise NotImplementedError("Doesn't support type {}".format(info.model_type))

    factory.PrepareTrainingAndTestTree(ROOT.TCut("1"), "")
    # Set method
    parameters = ":".join(
        ["{key}={value}".format(key=key, value=value) for key, value in classifier.method_parameters.items()]
    )
    factory.BookMethod(
        ROOT.TMVA.Types.__getattribute__(ROOT.TMVA.Types, classifier.method), classifier._method_name, parameters
    )

    factory.TrainAllMethods()
    file_out.Close()
Example #4
0
def tmva_process(classifier, info, data, labels, sample_weight):
    """
    Create TMVA classification factory, train, test and evaluate all methods

    :param classifier: classifier to train
    :type classifier: rep.estimators.tmva.TMVAClassifier or rep.estimators.tmva.TMVARegressor
    :param rep.estimators.tmva._AdditionalInformation info: additional information
    :param pandas.DataFrame data: train data
    :param labels: array-like - targets
    :param sample_weight: array-like - weights
    """

    ROOT.TMVA.Tools.Instance()

    file_out = ROOT.TFile(os.path.join(info.directory, info.tmva_root), "RECREATE")
    print(classifier.factory_options)
    factory = ROOT.TMVA.Factory(info.tmva_job, file_out, classifier.factory_options)

    for var in data.columns:
        factory.AddVariable(var)

    # Set data
    if info.model_type == 'classification':
        if classifier.method == 'kCuts':
            # signal must the first added tree, because rectangular cut optimization in another way doesn't work
            inds = numpy.argsort(labels)[::-1]
            data = data.ix[inds, :]
            labels = labels[inds]
            sample_weight = sample_weight[inds]
        add_classification_events(factory, numpy.array(data), labels, weights=sample_weight)
        add_classification_events(factory, numpy.array(data), labels, weights=sample_weight, test=True)
    elif info.model_type == 'regression':
        factory.AddTarget('target')
        add_regression_events(factory, numpy.array(data), labels, weights=sample_weight)
        add_regression_events(factory, numpy.array(data), labels, weights=sample_weight, test=True)
    else:
        raise NotImplementedError("Doesn't support type {}".format(info.model_type))

    factory.PrepareTrainingAndTestTree(ROOT.TCut('1'), "")
    # Set method
    parameters = ":".join(
        ["{key}={value}".format(key=key, value=value) for key, value in classifier.method_parameters.items()])
    factory.BookMethod(ROOT.TMVA.Types.__getattribute__(ROOT.TMVA.Types, classifier.method), classifier._method_name,
                       parameters)

    factory.TrainAllMethods()
    file_out.Close()
Example #5
0
def tmva_process(estimator, info, data, target, sample_weight):
    """
    Create a TMVA classification/regression factory; training, testing and evaluating.

    :param estimator: classifier/regressor which should be trained
    :type estimator: rep.estimators.tmva.TMVAClassifier or rep.estimators.tmva.TMVARegressor
    :param rep.estimators.tmva._AdditionalInformation info: additional information
    :param pandas.DataFrame data: training data
    :param target: array-like targets
    :param sample_weight: array-like samples weights
    """

    ROOT.TMVA.Tools.Instance()

    file_out = ROOT.TFile(os.path.join(info.directory, info.tmva_root), "RECREATE")
    factory = ROOT.TMVA.Factory(info.tmva_job, file_out, estimator.factory_options)

    for var in data.columns:
        factory.AddVariable(var)

    # Set data
    if info.model_type == 'classification':
        if estimator.method == 'kCuts':
            # signal must be the first added to the tree, because method *rectangular cut optimization* doesn't work in another way
            inds = numpy.argsort(target)[::-1]
            data = data.ix[inds, :]
            target = target[inds]
            sample_weight = sample_weight[inds]
        add_classification_events(factory, numpy.array(data), target, weights=sample_weight)
        add_classification_events(factory, numpy.array(data), target, weights=sample_weight, test=True)
    elif info.model_type == 'regression':
        factory.AddTarget('target')
        add_regression_events(factory, numpy.array(data), target, weights=sample_weight)
        add_regression_events(factory, numpy.array(data), target, weights=sample_weight, test=True)
    else:
        raise NotImplementedError("Doesn't support type {}".format(info.model_type))

    factory.PrepareTrainingAndTestTree(ROOT.TCut('1'), "")
    # Set method
    parameters = ":".join(
        ["{key}={value}".format(key=key, value=value) for key, value in estimator.method_parameters.items()])
    factory.BookMethod(ROOT.TMVA.Types.__getattribute__(ROOT.TMVA.Types, estimator.method), estimator._method_name,
                       parameters)

    factory.TrainAllMethods()
    file_out.Close()
Example #6
0
X_test, y_test, w_test = X[n_events:], y[n_events:], w[n_events:]

output = TFile('tmva_output.root', 'recreate')
factory = TMVA.Factory('classifier', output,
                       'AnalysisType=Multiclass:'
                       '!V:Silent:!DrawProgressBar')

if ROOT_VERSION >= '6.07/04':
    data = TMVA.DataLoader('.')
else:
    data = factory
for n in range(2):
    data.AddVariable('f{0}'.format(n), 'F')

# Call root_numpy's utility functions to add events from the arrays
add_classification_events(data, X_train, y_train, weights=w_train)
add_classification_events(data, X_test, y_test, weights=w_test, test=True)
# The following line is necessary if events have been added individually:
data.PrepareTrainingAndTestTree(TCut('1'), 'NormMode=EqualNumEvents')

# Train an MLP
if ROOT_VERSION >= '6.07/04':
    BookMethod = factory.BookMethod
else:
    BookMethod = TMVA.Factory.BookMethod
BookMethod(data, 'MLP', 'MLP',
           'NeuronType=tanh:NCycles=200:HiddenLayers=N+2,2:'
           'TestRate=5:EstimatorType=MSE')
factory.TrainAllMethods()

# Classify the test dataset with the BDT
Example #7
0
def tmva_process(estimator, info, data, target, sample_weight):
    """
    Create a TMVA classification/regression factory; training, testing and evaluating.

    :param estimator: classifier/regressor which should be trained
    :type estimator: rep.estimators.tmva.TMVAClassifier or rep.estimators.tmva.TMVARegressor
    :param rep.estimators.tmva._AdditionalInformation info: additional information
    :param pandas.DataFrame data: training data
    :param target: array-like targets
    :param sample_weight: array-like samples weights
    """

    ROOT.TMVA.Tools.Instance()

    file_out = ROOT.TFile(os.path.join(info.directory, info.tmva_root),
                          "RECREATE")
    factory = ROOT.TMVA.Factory(info.tmva_job, file_out,
                                estimator.factory_options)
    dataloader = ROOT.TMVA.DataLoader("DataLoader")

    for var in data.columns:
        dataloader.AddVariable(var)

    # Set data
    if info.model_type == 'classification':
        if estimator.method == 'kCuts':
            # signal must be the first added to the tree, because method *rectangular cut optimization* doesn't work in another way
            inds = numpy.argsort(target)[::-1]
            data = data.ix[inds, :]
            target = target[inds]
            sample_weight = sample_weight[inds]
        add_classification_events(dataloader,
                                  numpy.array(data),
                                  target,
                                  weights=sample_weight)
        add_classification_events(dataloader,
                                  numpy.array(data),
                                  target,
                                  weights=sample_weight,
                                  test=True)
    elif info.model_type == 'regression':
        factory.AddTarget('target')
        add_regression_events(dataloader,
                              numpy.array(data),
                              target,
                              weights=sample_weight)
        add_regression_events(dataloader,
                              numpy.array(data),
                              target,
                              weights=sample_weight,
                              test=True)
    else:
        raise NotImplementedError("Doesn't support type {}".format(
            info.model_type))

    dataloader.PrepareTrainingAndTestTree(ROOT.TCut('1'), "")
    # Set method
    parameters = ":".join([
        "{key}={value}".format(key=key, value=value)
        for key, value in estimator.method_parameters.items()
    ])
    factory.BookMethod(
        dataloader,
        ROOT.TMVA.Types.__getattribute__(ROOT.TMVA.Types, estimator.method),
        estimator._method_name, parameters)

    factory.TrainAllMethods()
    file_out.Close()