コード例 #1
0
ファイル: dspPlots.py プロジェクト: ToinoMF/datascience
 def plot_num_var_threshold(self,
                            tmin: float = 0,
                            tmax: float = 1,
                            step: float = 0.01):
     """
     Plots the number of variables as a function of the threshold defined to drop.
     Goes from 'tmin' to 'tmax' on 'step' steps.
     :return: --- <class 'NoneType'>
     """
     indexes = [0, 1, 2, 3, 4, 5, 6, 7]
     plt.figure(figsize=(7, 5))
     for i in indexes:
         print(i)
         vals = self.compute_num_var_threshold(tmin=tmin,
                                               tmax=tmax,
                                               step=step,
                                               idx=i)
         pF.single_line_chart(
             plt.gca(),
             vals[0],
             vals[1],
             layout=pF.LayoutStyleObject(
                 title='Number of Variables as a function of the Threshold',
                 xlabel='Threshold',
                 ylabel='Number of Variables',
                 grid=True),
             plotstyle=pF.PlotStyleObject(color='Blue',
                                          marker='o',
                                          alpha=0.5))
     plt.show()
コード例 #2
0
ファイル: dspPlots.py プロジェクト: ToinoMF/datascience
 def plot_gradient_boost(self,
                         drop: bool = True,
                         norm: bool = False,
                         threshold: float = 1):
     """
     For the given input, plots the single line chart for Gradient Boost results as a function of max_depths.
     :return: --- <class 'NoneType'>
     """
     plt.subplots(1, 1, figsize=(16, 4), squeeze=False)
     max_depths = [5, 10, 15, 20]
     pF.single_line_chart(plt.gca(),
                          max_depths,
                          self.compute_gradient_boost(
                              drop, norm, threshold),
                          'XGBoost',
                          'XGBoost',
                          'Max Depths',
                          'accuracy',
                          percentage=True)
     plt.show()
コード例 #3
0
ファイル: Boost.py プロジェクト: ToinoMF/datascience
def Boost(drop: bool = True, norm: bool = False, threshold: float = 1):
    if drop:
        full_set = data.compute_data_drop(threshold)
    else:
        full_set = data.compute_data_average(threshold)

    y: np.ndarray = full_set.pop('class').values

    if norm:
        X: np.ndarray = preprocessing.normalize(full_set.values)
    else:
        X: np.ndarray = full_set.values

    labels = pd.unique(y)

    trnX, tstX, trnY, tstY = train_test_split(X, y, train_size=0.7, stratify=y)

    max_depths = [5, 10, 15, 20]

    fig, axs = plt.subplots(1, 1, figsize=(16, 4), squeeze=False)

    values = []
    for d in max_depths:
        yvalues = []
        boost = XGBClassifier(max_depth=d)
        boost.fit(trnX, trnY)
        prdY = boost.predict(tstX)
        yvalues.append(metrics.accuracy_score(tstY, prdY))
        values.append(yvalues)

    pF.single_line_chart(plt.gca(),
                         max_depths,
                         values,
                         'XGBoost',
                         'XGBoost',
                         'Max Depths',
                         'accuracy',
                         percentage=True)
    plt.show()