def plot_hist2d_auto(df, branches, **kwargs): """ Retrieve the latex name of the branch and unit. Then, plot a 2d histogram with :py:func:`plot_hist2d`. Parameters ---------- df : pandas.Dataframe Dataframe that contains the branches branches : [str, str] names of the two branches **kwargs : dict arguments passed in :py:func:`plot_hist_2D` (except ``branches``, ``latex_branches`` and ``units``) Returns ------- fig : matplotlib.figure.Figure Figure of the plot (only if ``ax`` is not specified) ax : matplotlib.figure.Axes Axis of the plot (only if ``ax`` is not specified) """ latex_branches, units = pt.get_latex_branches_units(branches) add_in_dic('data_name', kwargs) pt._set_folder_name_from_data_name(kwargs, kwargs['data_name']) return plot_hist2d(df, branches, latex_branches=latex_branches, units=units, **kwargs)
def plot_hist_fit_auto(df, branch, cut_BDT=None, **kwargs): """ Retrieve the latex name of the branch and unit. Set the folder name to the name of the datasets. Then, plot 2d histogram with plot_hist_fit. Parameters ---------- df : pandas.Dataframe dataframe that contains the branch to plot branch : str branch (for instance: ``'B0_M'``), in dataframe cut_BDT : float or str ``BDT > cut_BDT`` cut. Used in the name of saved figure. **kwargs : dict arguments passed in :py:func:`plot_hist_fit` (except ``branch``, ``latex_branch``, ``unit``) Returns ------- fig : matplotlib.figure.Figure Figure of the plot (only if ``axis_mode`` is ``False``) ax[0] : matplotlib.figure.Axes Axis of the histogram + fitted curves + table ax[1] : matplotlib.figure.Axes Axis of the pull diagram (only if ``plot_pull`` is ``True``) """ # Retrieve particle name, and branch name and unit. # particle, var = retrieve_particle_branch(branch) # latex_branch = branchs_params[var]['name'] # unit = branchs_params[var]['unit'] # name_particle = particle_names[particle] latex_branch, unit = pt.get_latex_branches_units(branch) # Title and name of the file with BDT add_in_dic('fig_name', kwargs) add_in_dic('title', kwargs) add_in_dic('data_name', kwargs) kwargs['fig_name'] = pt._get_fig_name_given_BDT_cut( fig_name=kwargs['fig_name'], cut_BDT=cut_BDT, branch=branch, data_name=string.add_text(kwargs['data_name'], 'fit', '_', None)) kwargs['title'] = pt._get_title_given_BDT_cut(title=kwargs['title'], cut_BDT=cut_BDT) # Name of the folder = name of the data add_in_dic('folder_name', kwargs) if kwargs['folder_name'] is None and kwargs['data_name'] is not None: kwargs['folder_name'] = kwargs['data_name'] return plot_hist_fit(df, branch, latex_branch=latex_branch, unit=unit, **kwargs)
def plot_hist_auto(dfs, branch, cut_BDT=None, **kwargs): """ Retrieve the latex name of the branch and unit. Then, plot histogram with :py:func:`plot_hist`. Parameters ---------- dfs : dict(str:pandas.Dataframe) Dictionnary {name of the dataframe : pandas dataframe} cut_BDT : float or str ``BDT > cut_BDT`` cut. Used in the name of saved figure. branch : str branch (for instance: ``'B0_M'``), which should be in the dataframe(s) **kwargs : dict arguments passed in :py:func:`plot_hist` (except ``branch``, ``latex_branch`` and ``unit``) Returns ------- fig : matplotlib.figure.Figure Figure of the plot (only if ``ax`` is not specified) ax : matplotlib.figure.Axes Axis of the plot (only if ``ax`` is not specified) """ # Retrieve particle name, and branch name and unit. # particle, var = retrieve_particle_branch(branch) # name_var = branches_params[var]['name'] # unit = branches_params[var]['unit'] # name_particle = particle_names[particle] latex_branch, unit = pt.get_latex_branches_units(branch) data_names = string.list_into_string(list(dfs.keys())) add_in_dic('fig_name', kwargs) add_in_dic('title', kwargs) kwargs['fig_name'] = pt._get_fig_name_given_BDT_cut( fig_name=kwargs['fig_name'], cut_BDT=cut_BDT, branch=branch, data_name=data_names) kwargs['title'] = pt._get_title_given_BDT_cut(title=kwargs['title'], cut_BDT=cut_BDT) # Name of the folder = list of the names of the data pt._set_folder_name_from_data_name(kwargs, data_names) return plot_hist(dfs, branch, latex_branch, unit, **kwargs)
def _set_folder_name_from_data_name(kwargs, data_names): """ Change the key `"folder_name"` of a dictionnary by the list of data names (in place) Parameters ---------- kwargs: dict with the key `"folder_name"` data_names : str or list(str) name of the dataset(s) """ add_in_dic('folder_name', kwargs) if kwargs['folder_name'] is None: if isinstance(data_names, str): str_data_names = data_names else: str_data_names = string.list_into_string(data_names) kwargs['folder_name'] = str_data_names
def define_zparams(initial_values, cut_BDT=None, num=None): """Define zparams from the dictionnary initial_values Parameters ---------- initial_values : dict {"name_variable": {"value":, "low":, "high":, "floating":}} cut_BDT : float performed cut on the BDT (BDT > cutBDT) num : integer Index of the fit. add ``";{num}"`` at the end of the variable/ the other functions I wrote allow to ignore the ``";{num}"`` in the name of the variable. This is used manely in order to define a parameter several times (when tuning their values to make the fit convergent) Returns ------- zparams : dict[str, zfit.Parameter] Dictionnary of zfit Parameters whose keys are the name of the variables and: * if cut_BDT is None, the key is just the name of the variable * else, the key is ``"{name_variable}|BDT{cut_BDT}"`` """ zparams = {} for var in initial_values.keys(): if cut_BDT is not None: name_var = f"{var}|BDT{cut_BDT}" else: name_var = var if num is not None: name_var += f';{num}' init = initial_values[var] add_in_dic('value', init, default=None) add_in_dic('low', init, default=None) add_in_dic('high', init, default=None) add_in_dic('floating', init, default=True) zparams[var] = zfit.Parameter(name_var, init['value'], init['low'], init['high'], floating=init['floating']) return zparams
def set_text_LHCb(ax, text=default_project['text_plot'], fontsize=default_fontsize['text'], pos=None): """ Put a text on a plot Parameters ---------- ax : matplotlib.axes.Axes axis where to plot text : str text to plot fontsize : float fontsize of the text pos : dict, list or str Three possibilities - dictionnary with these keys - ``'x'``: position of the text along the x-axis - ``'y'``: position of the text along the y-axis - ``'ha'``: horizontal alignment - ``fontsize``: fontsize of the text - ``text`` : text to plot - list: ``[x, y, ha]`` - str: alignment ``'left'`` or ``'right'``. - if 'left', ``x = 0.02`` and ``y = 0.95`` - if 'right', ``x = 0.98`` and ``y = 0.95``. These values are also the default values for the dictionnary input mode. These parameters are passed to ``ax.text()``. Returns ------- matplotlib.text.Text the text element that ``plt.text`` returns """ if pos is not None: info = deepcopy(pos) if isinstance(pos, dict): ha = info['ha'] if ha == 'left': x = 0.02 if 'x' not in info else info['x'] y = 0.95 if 'y' not in info else info['y'] elif ha == 'right': x = 0.98 if 'x' not in info else info['x'] y = 0.95 if 'y' not in info else info['y'] add_in_dic('fontsize', pos, fontsize) add_in_dic('text', pos, text) fontsize = pos['fontsize'] text = pos['text'] elif isinstance(pos, str): if pos == 'left': x = 0.02 y = 0.95 ha = 'left' elif pos == 'right': x = 0.98 y = 0.95 ha = 'right' elif isinstance(pos, list): x = pos[0] y = pos[1] ha = pos[2] return ax.text(x, y, text, verticalalignment='top', horizontalalignment=ha, transform=ax.transAxes, fontsize=fontsize)
def BDT(X_train, y_train, classifier='adaboost', **hyperparams): """ Train the BDT and return the result Parameters ---------- X : numpy ndarray array with signal and background concatenated, The columns of X correspond to the variable the BDT will be trained with y : numpy array array with 1 if the concatened event is signal, 0 if it is background classifier : str Used classifier * ``'adaboost'`` * ``'gradientboosting'`` * ``'xgboost'`` (experimental) hyperparameters : dict used hyperparameters. Default: * ``n_estimators = 800`` * ``learning_rate = 0.1`` Returns ------- xgb.XGBClassifier trained XGboost classifier, if ``classifier == 'xgboost'`` sklearn.ensemble.AdaBoostClassifier trained adaboost classifier, if ``classifier == 'adaboost'`` sklearn.ensemble.GradientBoostingClassifier trained gradient boosting classifier, if ``classifier == 'gradientboosting'`` """ weights = compute_sample_weight(class_weight='balanced', y=y_train) if hyperparams is None: hyperparams = {} add_in_dic('n_estimators', hyperparams, 800) # Learning rate shrinks the contribution of each tree by alpha add_in_dic('learning_rate', hyperparams, 0.1) show_dictionnary(hyperparams, "hyperparameters") # Define the BDT if classifier == 'adaboost': dt = DecisionTreeClassifier(max_depth=3, min_samples_leaf=0.05) # The minimum number of samples required to be at a leaf node # here, since it's a float, it is expressed in fraction of len(X_train) # We need min_samples_leaf samples before deciding to create a new leaf bdt = AdaBoostClassifier(dt, algorithm='SAMME', verbose=1, **hyperparams) elif classifier == 'gradientboosting': bdt = GradientBoostingClassifier(max_depth=1, min_samples_split=2, verbose=1, random_state=15, **hyperparams) elif classifier == 'xgboost': # experimental import xgboost as xgb bdt = xgb.XGBClassifier(objective="binary:logistic", random_state=15, verbose=1, learning_rate=0.1) ## Learning (fit) bdt.fit(X_train, y_train, sample_weight=weights) return bdt