Ejemplo n.º 1
0
    def Plot(self, name_col1=7, name_col2=8, house_class=[]):
        """
        Plot built scatter Plot two columns.

        :param name_col1: first numeric column from dataset. Can accept index (int) and name (str) columns
        :param name_col2:   second numeric column from dataset. Can accept index (int) and name (str) columns
        :param house_class: It's classes from Hogwarts House, default use all four classes.
        :return:
        """
        ds = DataSet(self.file_name)
        col_mas_name = [name_col1, name_col2]
        for i in range(2):
            if (type(col_mas_name[i]) is str):
                if col_mas_name[i] in ds.dataset[0]:
                    col_mas_name[i] = ds.dataset[0].index(col_mas_name[i])
                else:
                    print('Error: bad name column')
                    return

        for i in range(2):
            if col_mas_name[i] < 0 or col_mas_name[i] >= len(ds.dataset[0]):
                print("Error: This isn't column")
                return
            if not ds.isNumeric_columns(col_mas_name[i]):
                print("Error: Input column must is numerics")
                return
        if self.size > (len(ds.dataset) - 1):
            self.size = len(ds.dataset) - 1
        col1 = ds.get_float_col(col_mas_name[0])
        col2 = ds.get_float_col(col_mas_name[1])
        color = {
            'Ravenclaw': 'b',
            'Gryffindor': 'r',
            'Slytherin': 'g',
            'Hufflepuff': 'yellow'
        }

        feature1 = {}
        feature2 = {}
        house_class = [i for i in house_class if i in set(ds.get_col(self.y_col))] if house_class else set(ds.get_col(self.y_col))
        house_class = set(ds.get_col(self.y_col)) if not house_class else house_class
        for i in house_class:
            feature1[i] = []
            feature2[i] = []
        for i in range(1, len(ds.dataset)):
            if ds.dataset[i][self.y_col] in house_class:
                feature1[ds.dataset[i][self.y_col]].append(col1[i - 1])
                feature2[ds.dataset[i][self.y_col]].append(col2[i - 1])
        for i in feature1.keys():
            plot.scatter(feature1[i][:self.size], feature2[i][:self.size], c=color[i], alpha=0.5, label=i)
        if self.legend:
            plot.legend(loc='upper right')
        plot.ylabel(ds.dataset[0][col_mas_name[1]])
        plot.xlabel(ds.dataset[0][col_mas_name[0]])
        plot.title('Scatter Plot')
        plot.savefig('datasets/scatter_plot.png')
        plot.show()
Ejemplo n.º 2
0
    def Plot(self):
        ds = DataSet(self.file_name)
        ds.find_numeric_label()
        if self.max_nb_columns > (len(ds.numeric_columns)):
            self.max_nb_columns = len(ds.numeric_columns)

        color = {
            'Ravenclaw': 'b',
            'Gryffindor': 'r',
            'Slytherin': 'g',
            'Hufflepuff': 'yellow'
        }

        fig, ax = plot.subplots(self.max_nb_columns, self.max_nb_columns, figsize=self.fig_size)

        fig.tight_layout()
        N = self.max_nb_columns
        for i in range(N):
            col1 = ds.get_float_col(ds.numeric_columns[i])[:self.size]
            for j in range(N):
                col2 = ds.get_float_col(ds.numeric_columns[j])[:self.size]
                feature1 = {}
                feature2 = {}
                for k in set(ds.get_col(self.y_col)):
                    feature1[k] = []
                    feature2[k] = []
                for k in range(1, len(ds.dataset[:self.size])):
                    feature1[ds.dataset[k][self.y_col]].append(col1[k - 1])
                    feature2[ds.dataset[k][self.y_col]].append(col2[k - 1])
                if i == 0:
                    ax[i, j].xaxis.set_label_position('top')
                    ax[i, j].set_xlabel(ds.dataset[0][ds.numeric_columns[j]], rotation=0)
                if j == 0:
                    ax[i, j].set_ylabel(ds.dataset[0][ds.numeric_columns[i]], rotation=0)
                if (i == j):
                    statistic = Math_calculat(col1)
                    bins = np.linspace(statistic.Quartile(0), statistic.Quartile(1))
                    for k in feature1.keys():
                        ax[i, j].hist(feature1[k], bins, facecolor=color[k], alpha=0.5, label=k)

                else:
                    for k in feature1.keys():
                        ax[i, j].scatter(feature1[k], feature2[k], c=color[k], alpha=0.5, label=k)
                ax[i, j].tick_params(labelbottom=False)
                ax[i, j].tick_params(labelleft=False)

        if self.legend:
            plot.legend(loc='lower right')
        plot.savefig('datasets/pair_plot.png')
        plot.show()
Ejemplo n.º 3
0
    def Plot(self, col_nb):
        ds = DataSet(self.file_name)
        if (type(col_nb) is str):
            if col_nb in ds.dataset[0]:
                col_nb = ds.dataset[0].index(col_nb)
            else:
                print('Error with name column')
                return

        if not ds.isNumeric_columns(col_nb):
            print("Input column must is numerics")
            return

        col = ds.get_float_col(col_nb)
        statistic = Math_calculat(col)
        bins = np.linspace(statistic.Quartile(0), statistic.Quartile(1),
                           self.size)
        color = {
            'Ravenclaw': 'b',
            'Gryffindor': 'r',
            'Slytherin': 'g',
            'Hufflepuff': 'yellow'
        }

        feature = {}
        for i in set(ds.get_col(self.y_col)):
            feature[i] = []
        for i in range(1, len(ds.dataset)):
            feature[ds.dataset[i][self.y_col]].append(col[i - 1])

        for i in feature.keys():
            plot.hist(feature[i], bins, facecolor=color[i], alpha=0.5, label=i)
        if self.legend:
            plot.legend(loc='upper right')
        plot.ylabel('Frequency')
        plot.xlabel('Value')
        plot.title('Histogram')
        plot.savefig('datasets/histogram.png')
        plot.show()