def plot_forest_features(self, drop: bool = True, norm: bool = False, thresholds: list = None, idx: int = -1): """ Plot the forest features obtained previously. :return: --- <class 'NoneType'> """ if thresholds is None: thresholds = [1] else: thresholds = sorted(thresholds) max_features = ['sqrt', 'log2'] n_estimators = [5, 10, 25, 50, 75, 100, 150, 200, 250, 300] thr_len = len(thresholds) fig, axs = plt.subplots(thr_len, 2, figsize=(10, 4), squeeze=False) for i in range(thr_len): aux = self.compute_forest_features(drop, norm, thresholds[i], idx) for k in range(len(max_features)): layout = pF.LayoutStyleObject( title='Random Forests with %s features' % max_features[k] + '[T=' + str(thresholds[i]) + ']', xlabel='Number of estimators', ylabel='Accuracy', grid=True) pF.multiple_line_chart(axs[i, k], n_estimators, aux[k], layout=layout, percentage=True) plt.show()
def plot_histogram(self, col_index: list = None, n_graphs: int = 5): """ Plots the histogram distribution for the given index columns, displaced in a matrix with n_graphs length. :return: --- <class 'NoneType'> """ self.process_timer = time.time() if col_index is None: col_index = [2, 3, 4, 5, 6] columns = (self.data.iloc[:, col_index]).columns rows, cols = pF.choose_grid(len(columns) - 1, n_graphs) fig, axs = plt.subplots(rows, cols, figsize=(cols * n_graphs, rows * n_graphs), squeeze=False) i, j = 0, 0 for n in range(len(columns)): pF.histogram(axs[i, j], self.data[columns[n]], layout=pF.LayoutStyleObject(title='Histogram for %s' % columns[n], xlabel='', ylabel='', grid=True)) i, j = (i + 1, 0) if (n + 1) % cols == 0 else (i, j + 1) plt.show() self.get_timer("plot_histogram")
def plot_knn_var_threshold(self, drop: bool = True, norm: bool = False, thresholds: list = None, n_iter: int = 20, parity: bool = False, smote: bool = False): """ Plots the knn accuracy as function of n for given thresholds list. :return: --- <class 'NoneType'> """ if thresholds is None: thresholds = [ 0.81, 0.83, 0.85, 0.87, 0.89, 0.91, 0.93, 0.95, 0.97, 0.99 ] else: thresholds = sorted(thresholds) if parity: nvalues = [2 * i + 2 for i in range(round(n_iter / 2))] else: nvalues = [2 * i + 1 for i in range(round(n_iter / 2))] if drop: x = len(thresholds) fig, axs = plt.subplots(int(x / 5), int(x / (x / 5)), figsize=(15, 6), squeeze=False) fig.subplots_adjust(hspace=.5, wspace=.001) axs = axs.ravel() for j in range(int(len(thresholds))): axs[j].set_title('n') layout = pF.LayoutStyleObject( title='KNN variants - threshold ' + str(thresholds[j]), xlabel='n', ylabel='accuracy', grid=True) pF.multiple_line_chart(axs[j], nvalues, self.compute_knn(drop, norm, thresholds[j], n_iter, parity, smote=smote), layout=layout, percentage=True, legends=True) fig.tight_layout() plt.show() else: plt.figure() pF.multiple_line_chart(plt.gca(), nvalues, self.compute_knn(drop, norm, thresholds), title='KNN variants', xlabel='n', ylabel='accuracy', percentage=True) plt.show()
def plot_num_var_threshold(self, tmin: float = 0, tmax: float = 1, step: float = 0.01): """ Plots the number of variables as a function of the threshold defined to drop. Goes from 'tmin' to 'tmax' on 'step' steps. :return: --- <class 'NoneType'> """ indexes = [0, 1, 2, 3, 4, 5, 6, 7] plt.figure(figsize=(7, 5)) for i in indexes: print(i) vals = self.compute_num_var_threshold(tmin=tmin, tmax=tmax, step=step, idx=i) pF.single_line_chart( plt.gca(), vals[0], vals[1], layout=pF.LayoutStyleObject( title='Number of Variables as a function of the Threshold', xlabel='Threshold', ylabel='Number of Variables', grid=True), plotstyle=pF.PlotStyleObject(color='Blue', marker='o', alpha=0.5)) plt.show()
def plot_tree_criteria(self, drop: bool = True, norm: bool = False, thresholds: list = None, idx: int = -1): """ For the given thresholds, plots the decision trees criteria, according to 'entropy' and 'gini' criteria. :return: --- <class 'NoneType'> """ if thresholds is None: thresholds = [1] else: thresholds = sorted(thresholds) criteria = ['entropy', 'gini'] min_samples_leaf = [.10] thr_len = len(thresholds) fig, axs = plt.subplots(thr_len, 2, figsize=(10, 4), squeeze=False) for i in range(thr_len): for k in range(len(criteria)): layout = pF.LayoutStyleObject( title='Decision Trees with %s criteria' % criteria[k] + '[T=' + str(thresholds[i]) + ']', xlabel='Number of estimators', ylabel='Accuracy', grid=True) pF.multiple_line_chart(axs[i, k], min_samples_leaf, self.compute_tree_criteria( drop, norm, thresholds[i], idx)[k], layout=layout, percentage=True) plt.show()
def draw_plots_none(var_x, var_y, colors): aux_title = '' for i in range(len(var_y)): aux_title += var_y[i] aux_title += '/' aux_title = aux_title[:-1] # define layout style object layout = pF.LayoutStyleObject(aux_title + ' vs ' + var_x, var_x, aux_title, grid=True) # define first plot style object n = 0 plot_style = {} for i in range(len(var_y)): plot_style[str(n)] = pF.PlotStyleObject(color=colors[n], legend=var_y[i], marker='o', markersize=5, linewidth=0) n += 1 # define data to plot n = 0 xvalues = {} if var_x == 'ID': for i in range(len(var_y)): xvalues[str(n)] = data[var_y[i]].index.values n += 1 else: for i in range(len(var_y)): xvalues[str(n)] = data[var_x].values n += 1 n = 0 yvalues = {} for i in range(len(var_y)): yvalues[str(n)] = data[var_y[i]].values n += 1 # define figure plt.figure(figsize=(13, 6)) pF.multiple_plots(plt.gca(), xvalues, yvalues, layout, plot_style, legends=True) plt.show()
def plot_naive_bayes_var_threshold(self, drop: bool = True, norm: bool = False, thresholds: list = None, smote: bool = False): """ Plots the knn accuracy as function of n for given thresholds list. :return: --- <class 'NoneType'> """ if thresholds is None: thresholds = [ 0.81, 0.83, 0.85, 0.87, 0.89, 0.91, 0.93, 0.95, 0.97, 0.99 ] else: thresholds = sorted(thresholds) if norm: x_vals = ['Gaussian', 'Multinomial', 'Bernouly'] y_vals = [[], [], []] else: x_vals = ['Gaussian', 'Bernouly'] y_vals = [[], []] for i in thresholds: aux = self.compute_naive_bayes(drop, norm, i, smote=smote) y_vals[0].append(aux[1][0]) y_vals[1].append(aux[1][1]) if norm: y_vals[2].append(aux[1][2]) plt.figure() layout = pF.LayoutStyleObject( title='Naive Bayes accuracy as function of threshold', xlabel='Threshold', ylabel='Accuracy', grid=True) pF.multiple_bar_chart(plt.gca(), thresholds, y_vals, x_vals, layout=layout, legends=True, percentage=True) plt.show()