def Plot(self, name_col1=7, name_col2=8, house_class=[]): """ Plot built scatter Plot two columns. :param name_col1: first numeric column from dataset. Can accept index (int) and name (str) columns :param name_col2: second numeric column from dataset. Can accept index (int) and name (str) columns :param house_class: It's classes from Hogwarts House, default use all four classes. :return: """ ds = DataSet(self.file_name) col_mas_name = [name_col1, name_col2] for i in range(2): if (type(col_mas_name[i]) is str): if col_mas_name[i] in ds.dataset[0]: col_mas_name[i] = ds.dataset[0].index(col_mas_name[i]) else: print('Error: bad name column') return for i in range(2): if col_mas_name[i] < 0 or col_mas_name[i] >= len(ds.dataset[0]): print("Error: This isn't column") return if not ds.isNumeric_columns(col_mas_name[i]): print("Error: Input column must is numerics") return if self.size > (len(ds.dataset) - 1): self.size = len(ds.dataset) - 1 col1 = ds.get_float_col(col_mas_name[0]) col2 = ds.get_float_col(col_mas_name[1]) color = { 'Ravenclaw': 'b', 'Gryffindor': 'r', 'Slytherin': 'g', 'Hufflepuff': 'yellow' } feature1 = {} feature2 = {} house_class = [i for i in house_class if i in set(ds.get_col(self.y_col))] if house_class else set(ds.get_col(self.y_col)) house_class = set(ds.get_col(self.y_col)) if not house_class else house_class for i in house_class: feature1[i] = [] feature2[i] = [] for i in range(1, len(ds.dataset)): if ds.dataset[i][self.y_col] in house_class: feature1[ds.dataset[i][self.y_col]].append(col1[i - 1]) feature2[ds.dataset[i][self.y_col]].append(col2[i - 1]) for i in feature1.keys(): plot.scatter(feature1[i][:self.size], feature2[i][:self.size], c=color[i], alpha=0.5, label=i) if self.legend: plot.legend(loc='upper right') plot.ylabel(ds.dataset[0][col_mas_name[1]]) plot.xlabel(ds.dataset[0][col_mas_name[0]]) plot.title('Scatter Plot') plot.savefig('datasets/scatter_plot.png') plot.show()
def Plot(self): ds = DataSet(self.file_name) ds.find_numeric_label() if self.max_nb_columns > (len(ds.numeric_columns)): self.max_nb_columns = len(ds.numeric_columns) color = { 'Ravenclaw': 'b', 'Gryffindor': 'r', 'Slytherin': 'g', 'Hufflepuff': 'yellow' } fig, ax = plot.subplots(self.max_nb_columns, self.max_nb_columns, figsize=self.fig_size) fig.tight_layout() N = self.max_nb_columns for i in range(N): col1 = ds.get_float_col(ds.numeric_columns[i])[:self.size] for j in range(N): col2 = ds.get_float_col(ds.numeric_columns[j])[:self.size] feature1 = {} feature2 = {} for k in set(ds.get_col(self.y_col)): feature1[k] = [] feature2[k] = [] for k in range(1, len(ds.dataset[:self.size])): feature1[ds.dataset[k][self.y_col]].append(col1[k - 1]) feature2[ds.dataset[k][self.y_col]].append(col2[k - 1]) if i == 0: ax[i, j].xaxis.set_label_position('top') ax[i, j].set_xlabel(ds.dataset[0][ds.numeric_columns[j]], rotation=0) if j == 0: ax[i, j].set_ylabel(ds.dataset[0][ds.numeric_columns[i]], rotation=0) if (i == j): statistic = Math_calculat(col1) bins = np.linspace(statistic.Quartile(0), statistic.Quartile(1)) for k in feature1.keys(): ax[i, j].hist(feature1[k], bins, facecolor=color[k], alpha=0.5, label=k) else: for k in feature1.keys(): ax[i, j].scatter(feature1[k], feature2[k], c=color[k], alpha=0.5, label=k) ax[i, j].tick_params(labelbottom=False) ax[i, j].tick_params(labelleft=False) if self.legend: plot.legend(loc='lower right') plot.savefig('datasets/pair_plot.png') plot.show()
def Plot(self, col_nb): ds = DataSet(self.file_name) if (type(col_nb) is str): if col_nb in ds.dataset[0]: col_nb = ds.dataset[0].index(col_nb) else: print('Error with name column') return if not ds.isNumeric_columns(col_nb): print("Input column must is numerics") return col = ds.get_float_col(col_nb) statistic = Math_calculat(col) bins = np.linspace(statistic.Quartile(0), statistic.Quartile(1), self.size) color = { 'Ravenclaw': 'b', 'Gryffindor': 'r', 'Slytherin': 'g', 'Hufflepuff': 'yellow' } feature = {} for i in set(ds.get_col(self.y_col)): feature[i] = [] for i in range(1, len(ds.dataset)): feature[ds.dataset[i][self.y_col]].append(col[i - 1]) for i in feature.keys(): plot.hist(feature[i], bins, facecolor=color[i], alpha=0.5, label=i) if self.legend: plot.legend(loc='upper right') plot.ylabel('Frequency') plot.xlabel('Value') plot.title('Histogram') plot.savefig('datasets/histogram.png') plot.show()