Exemple #1
0
def plot_hist2d_auto(df, branches, **kwargs):
    """  Retrieve the latex name of the branch and unit.
    Then, plot a 2d histogram with :py:func:`plot_hist2d`.

    Parameters
    ----------
    df        : pandas.Dataframe
        Dataframe that contains the branches
    branches  : [str, str]
        names of the two branches
    **kwargs  : dict
        arguments passed in :py:func:`plot_hist_2D` (except ``branches``, ``latex_branches`` and ``units``)

    Returns
    -------
    fig : matplotlib.figure.Figure
        Figure of the plot (only if ``ax`` is not specified)
    ax : matplotlib.figure.Axes
        Axis of the plot (only if ``ax`` is not specified)
    """

    latex_branches, units = pt.get_latex_branches_units(branches)
    add_in_dic('data_name', kwargs)
    pt._set_folder_name_from_data_name(kwargs, kwargs['data_name'])

    return plot_hist2d(df,
                       branches,
                       latex_branches=latex_branches,
                       units=units,
                       **kwargs)
Exemple #2
0
def plot_hist_fit_auto(df, branch, cut_BDT=None, **kwargs):
    """ Retrieve the latex name of the branch and unit. Set the folder name to the name of the datasets.
    Then, plot 2d histogram with plot_hist_fit.

    Parameters
    ----------

    df            : pandas.Dataframe
        dataframe that contains the branch to plot
    branch : str
        branch (for instance: ``'B0_M'``), in dataframe
    cut_BDT         : float or str
        ``BDT > cut_BDT`` cut. Used in the name of saved figure.
    **kwargs : dict
        arguments passed in :py:func:`plot_hist_fit` (except ``branch``, ``latex_branch``, ``unit``)

    Returns
    -------
    fig   : matplotlib.figure.Figure
        Figure of the plot (only if ``axis_mode`` is ``False``)
    ax[0] : matplotlib.figure.Axes
        Axis of the histogram + fitted curves + table
    ax[1] : matplotlib.figure.Axes
        Axis of the pull diagram (only if ``plot_pull`` is ``True``)
    """

    # Retrieve particle name, and branch name and unit.
    #     particle, var = retrieve_particle_branch(branch)

    #     latex_branch = branchs_params[var]['name']
    #     unit = branchs_params[var]['unit']
    #     name_particle = particle_names[particle]

    latex_branch, unit = pt.get_latex_branches_units(branch)

    # Title and name of the file with BDT
    add_in_dic('fig_name', kwargs)
    add_in_dic('title', kwargs)
    add_in_dic('data_name', kwargs)

    kwargs['fig_name'] = pt._get_fig_name_given_BDT_cut(
        fig_name=kwargs['fig_name'],
        cut_BDT=cut_BDT,
        branch=branch,
        data_name=string.add_text(kwargs['data_name'], 'fit', '_', None))

    kwargs['title'] = pt._get_title_given_BDT_cut(title=kwargs['title'],
                                                  cut_BDT=cut_BDT)

    # Name of the folder = name of the data
    add_in_dic('folder_name', kwargs)

    if kwargs['folder_name'] is None and kwargs['data_name'] is not None:
        kwargs['folder_name'] = kwargs['data_name']

    return plot_hist_fit(df,
                         branch,
                         latex_branch=latex_branch,
                         unit=unit,
                         **kwargs)
Exemple #3
0
def plot_hist_auto(dfs, branch, cut_BDT=None, **kwargs):
    """ Retrieve the latex name of the branch and unit.
    Then, plot histogram with :py:func:`plot_hist`.

    Parameters
    ----------

    dfs             : dict(str:pandas.Dataframe)
        Dictionnary {name of the dataframe : pandas dataframe}
    cut_BDT         : float or str
        ``BDT > cut_BDT`` cut. Used in the name of saved figure.
    branch          : str
        branch (for instance: ``'B0_M'``), which should be in the dataframe(s)
    **kwargs        : dict
        arguments passed in :py:func:`plot_hist` (except ``branch``, ``latex_branch`` and ``unit``)

    Returns
    -------
    fig : matplotlib.figure.Figure
        Figure of the plot (only if ``ax`` is not specified)
    ax : matplotlib.figure.Axes
        Axis of the plot (only if ``ax`` is not specified)
    """

    # Retrieve particle name, and branch name and unit.
    #     particle, var = retrieve_particle_branch(branch)

    #     name_var = branches_params[var]['name']
    #     unit = branches_params[var]['unit']
    #     name_particle = particle_names[particle]

    latex_branch, unit = pt.get_latex_branches_units(branch)
    data_names = string.list_into_string(list(dfs.keys()))

    add_in_dic('fig_name', kwargs)
    add_in_dic('title', kwargs)
    kwargs['fig_name'] = pt._get_fig_name_given_BDT_cut(
        fig_name=kwargs['fig_name'],
        cut_BDT=cut_BDT,
        branch=branch,
        data_name=data_names)
    kwargs['title'] = pt._get_title_given_BDT_cut(title=kwargs['title'],
                                                  cut_BDT=cut_BDT)

    # Name of the folder = list of the names of the data
    pt._set_folder_name_from_data_name(kwargs, data_names)

    return plot_hist(dfs, branch, latex_branch, unit, **kwargs)
Exemple #4
0
def _set_folder_name_from_data_name(kwargs, data_names):
    """ Change the key `"folder_name"` of a dictionnary by the list of data names (in place)

    Parameters
    ----------
    kwargs: dict
        with the key `"folder_name"`
    data_names : str or list(str)
        name of the dataset(s)
    """
    add_in_dic('folder_name', kwargs)
    if kwargs['folder_name'] is None:
        if isinstance(data_names, str):
            str_data_names = data_names
        else:
            str_data_names = string.list_into_string(data_names)
        kwargs['folder_name'] = str_data_names
Exemple #5
0
def define_zparams(initial_values, cut_BDT=None, num=None):
    """Define zparams from the dictionnary initial_values

    Parameters
    ----------

    initial_values : dict
        {"name_variable": {"value":, "low":, "high":, "floating":}}
    cut_BDT        : float
        performed cut on the BDT (BDT > cutBDT)
    num            : integer
        Index of the fit. add ``";{num}"`` at the end of the variable/
        the other functions I wrote allow to ignore the ``";{num}"`` in the name of the variable. This is used manely in order to define a parameter several times (when tuning their values to make the fit convergent)

    Returns
    -------
    zparams        : dict[str, zfit.Parameter]
        Dictionnary of zfit Parameters whose keys are the name of the variables and:

        * if cut_BDT is None, the key is just the name of the variable
        * else, the key is ``"{name_variable}|BDT{cut_BDT}"``
    """
    zparams = {}
    for var in initial_values.keys():
        if cut_BDT is not None:
            name_var = f"{var}|BDT{cut_BDT}"
        else:
            name_var = var
        if num is not None:
            name_var += f';{num}'

        init = initial_values[var]
        add_in_dic('value', init, default=None)
        add_in_dic('low', init, default=None)
        add_in_dic('high', init, default=None)
        add_in_dic('floating', init, default=True)

        zparams[var] = zfit.Parameter(name_var,
                                      init['value'],
                                      init['low'],
                                      init['high'],
                                      floating=init['floating'])

    return zparams
Exemple #6
0
def set_text_LHCb(ax,
                  text=default_project['text_plot'],
                  fontsize=default_fontsize['text'],
                  pos=None):
    """ Put a text on a plot

    Parameters
    ----------
    ax       : matplotlib.axes.Axes
        axis where to plot
    text     : str
        text to plot
    fontsize : float
        fontsize of the text
    pos      : dict, list or str
        Three possibilities

        - dictionnary with these keys

            - ``'x'``: position of the text along the x-axis
            - ``'y'``: position of the text along the y-axis
            - ``'ha'``: horizontal alignment
            - ``fontsize``: fontsize of the text
            - ``text`` : text to plot

        - list: ``[x, y, ha]``

        - str: alignment ``'left'`` or ``'right'``.

            - if 'left', ``x = 0.02`` and ``y = 0.95``
            - if 'right', ``x = 0.98`` and ``y = 0.95``.

        These values are also the default values for the dictionnary input mode.
        These parameters are passed to ``ax.text()``.

    Returns
    -------
    matplotlib.text.Text
        the text element that ``plt.text`` returns
    """
    if pos is not None:
        info = deepcopy(pos)
        if isinstance(pos, dict):
            ha = info['ha']
            if ha == 'left':
                x = 0.02 if 'x' not in info else info['x']
                y = 0.95 if 'y' not in info else info['y']
            elif ha == 'right':
                x = 0.98 if 'x' not in info else info['x']
                y = 0.95 if 'y' not in info else info['y']

            add_in_dic('fontsize', pos, fontsize)
            add_in_dic('text', pos, text)

            fontsize = pos['fontsize']
            text = pos['text']

        elif isinstance(pos, str):
            if pos == 'left':
                x = 0.02
                y = 0.95
                ha = 'left'
            elif pos == 'right':
                x = 0.98
                y = 0.95
            ha = 'right'
        elif isinstance(pos, list):
            x = pos[0]
            y = pos[1]
            ha = pos[2]

        return ax.text(x,
                       y,
                       text,
                       verticalalignment='top',
                       horizontalalignment=ha,
                       transform=ax.transAxes,
                       fontsize=fontsize)
Exemple #7
0
def BDT(X_train, y_train, classifier='adaboost', **hyperparams):
    """ Train the BDT and return the result

    Parameters
    ----------
    X               : numpy ndarray
        array with signal and background concatenated,
        The columns of X correspond to the variable the BDT will be trained with
    y               : numpy array
        array with 1 if the concatened event is signal, 0 if it is background
    classifier      : str
        Used classifier

        * ``'adaboost'``
        * ``'gradientboosting'``
        * ``'xgboost'`` (experimental)
    hyperparameters : dict
        used hyperparameters.
        Default:

        * ``n_estimators = 800``
        * ``learning_rate = 0.1``

    Returns
    -------
    xgb.XGBClassifier
        trained XGboost classifier, if ``classifier == 'xgboost'``
    sklearn.ensemble.AdaBoostClassifier
        trained adaboost classifier, if ``classifier == 'adaboost'``
    sklearn.ensemble.GradientBoostingClassifier
        trained gradient boosting classifier, if ``classifier == 'gradientboosting'``
    """

    weights = compute_sample_weight(class_weight='balanced', y=y_train)

    if hyperparams is None:
        hyperparams = {}

    add_in_dic('n_estimators', hyperparams, 800)
    # Learning rate shrinks the contribution of each tree by alpha
    add_in_dic('learning_rate', hyperparams, 0.1)
    show_dictionnary(hyperparams, "hyperparameters")

    # Define the BDT
    if classifier == 'adaboost':
        dt = DecisionTreeClassifier(max_depth=3, min_samples_leaf=0.05)
        # The minimum number of samples required to be at a leaf node
        # here, since it's a float, it is expressed in fraction of len(X_train)
        # We need min_samples_leaf samples before deciding to create a new leaf
        bdt = AdaBoostClassifier(dt,
                                 algorithm='SAMME',
                                 verbose=1,
                                 **hyperparams)

    elif classifier == 'gradientboosting':
        bdt = GradientBoostingClassifier(max_depth=1,
                                         min_samples_split=2,
                                         verbose=1,
                                         random_state=15,
                                         **hyperparams)

    elif classifier == 'xgboost':  # experimental
        import xgboost as xgb
        bdt = xgb.XGBClassifier(objective="binary:logistic",
                                random_state=15,
                                verbose=1,
                                learning_rate=0.1)

    ## Learning (fit)
    bdt.fit(X_train, y_train, sample_weight=weights)

    return bdt