Exemple #1
0
 def time_plot_andrews_curves(self):
     andrews_curves(self.df, "Name")
Exemple #2
0
    def test_andrews_curves(self):
        from pandas.plotting import andrews_curves
        from matplotlib import cm

        df = self.iris

        _check_plot_works(andrews_curves, frame=df, class_column='Name')

        rgba = ('#556270', '#4ECDC4', '#C7F464')
        ax = _check_plot_works(andrews_curves, frame=df,
                               class_column='Name', color=rgba)
        self._check_colors(
            ax.get_lines()[:10], linecolors=rgba, mapping=df['Name'][:10])

        cnames = ['dodgerblue', 'aquamarine', 'seagreen']
        ax = _check_plot_works(andrews_curves, frame=df,
                               class_column='Name', color=cnames)
        self._check_colors(
            ax.get_lines()[:10], linecolors=cnames, mapping=df['Name'][:10])

        ax = _check_plot_works(andrews_curves, frame=df,
                               class_column='Name', colormap=cm.jet)
        cmaps = lmap(cm.jet, np.linspace(0, 1, df['Name'].nunique()))
        self._check_colors(
            ax.get_lines()[:10], linecolors=cmaps, mapping=df['Name'][:10])

        length = 10
        df = DataFrame({"A": random.rand(length),
                        "B": random.rand(length),
                        "C": random.rand(length),
                        "Name": ["A"] * length})

        _check_plot_works(andrews_curves, frame=df, class_column='Name')

        rgba = ('#556270', '#4ECDC4', '#C7F464')
        ax = _check_plot_works(andrews_curves, frame=df,
                               class_column='Name', color=rgba)
        self._check_colors(
            ax.get_lines()[:10], linecolors=rgba, mapping=df['Name'][:10])

        cnames = ['dodgerblue', 'aquamarine', 'seagreen']
        ax = _check_plot_works(andrews_curves, frame=df,
                               class_column='Name', color=cnames)
        self._check_colors(
            ax.get_lines()[:10], linecolors=cnames, mapping=df['Name'][:10])

        ax = _check_plot_works(andrews_curves, frame=df,
                               class_column='Name', colormap=cm.jet)
        cmaps = lmap(cm.jet, np.linspace(0, 1, df['Name'].nunique()))
        self._check_colors(
            ax.get_lines()[:10], linecolors=cmaps, mapping=df['Name'][:10])

        colors = ['b', 'g', 'r']
        df = DataFrame({"A": [1, 2, 3],
                        "B": [1, 2, 3],
                        "C": [1, 2, 3],
                        "Name": colors})
        ax = andrews_curves(df, 'Name', color=colors)
        handles, labels = ax.get_legend_handles_labels()
        self._check_colors(handles, linecolors=colors)

        with tm.assert_produces_warning(FutureWarning):
            andrews_curves(data=df, class_column='Name')
Exemple #3
0
    def test_andrews_curves(self, iris):
        from pandas.plotting import andrews_curves
        from matplotlib import cm

        df = iris

        _check_plot_works(andrews_curves, frame=df, class_column="Name")

        rgba = ("#556270", "#4ECDC4", "#C7F464")
        ax = _check_plot_works(andrews_curves,
                               frame=df,
                               class_column="Name",
                               color=rgba)
        self._check_colors(ax.get_lines()[:10],
                           linecolors=rgba,
                           mapping=df["Name"][:10])

        cnames = ["dodgerblue", "aquamarine", "seagreen"]
        ax = _check_plot_works(andrews_curves,
                               frame=df,
                               class_column="Name",
                               color=cnames)
        self._check_colors(ax.get_lines()[:10],
                           linecolors=cnames,
                           mapping=df["Name"][:10])

        ax = _check_plot_works(andrews_curves,
                               frame=df,
                               class_column="Name",
                               colormap=cm.jet)
        cmaps = [cm.jet(n) for n in np.linspace(0, 1, df["Name"].nunique())]
        self._check_colors(ax.get_lines()[:10],
                           linecolors=cmaps,
                           mapping=df["Name"][:10])

        length = 10
        df = DataFrame({
            "A": random.rand(length),
            "B": random.rand(length),
            "C": random.rand(length),
            "Name": ["A"] * length,
        })

        _check_plot_works(andrews_curves, frame=df, class_column="Name")

        rgba = ("#556270", "#4ECDC4", "#C7F464")
        ax = _check_plot_works(andrews_curves,
                               frame=df,
                               class_column="Name",
                               color=rgba)
        self._check_colors(ax.get_lines()[:10],
                           linecolors=rgba,
                           mapping=df["Name"][:10])

        cnames = ["dodgerblue", "aquamarine", "seagreen"]
        ax = _check_plot_works(andrews_curves,
                               frame=df,
                               class_column="Name",
                               color=cnames)
        self._check_colors(ax.get_lines()[:10],
                           linecolors=cnames,
                           mapping=df["Name"][:10])

        ax = _check_plot_works(andrews_curves,
                               frame=df,
                               class_column="Name",
                               colormap=cm.jet)
        cmaps = [cm.jet(n) for n in np.linspace(0, 1, df["Name"].nunique())]
        self._check_colors(ax.get_lines()[:10],
                           linecolors=cmaps,
                           mapping=df["Name"][:10])

        colors = ["b", "g", "r"]
        df = DataFrame({
            "A": [1, 2, 3],
            "B": [1, 2, 3],
            "C": [1, 2, 3],
            "Name": colors
        })
        ax = andrews_curves(df, "Name", color=colors)
        handles, labels = ax.get_legend_handles_labels()
        self._check_colors(handles, linecolors=colors)

        with tm.assert_produces_warning(FutureWarning):
            andrews_curves(data=df, class_column="Name")
plt.figure(figsize=(15, 10))
parallel_coordinates(dataset.drop("Id", axis=1), "Species")
plt.title("Parallel Coordinates Plot", fontsize=20, fontweight="bold")
plt.xlabel("Features", fontsize=15)
plt.ylabel("Features Values", fontsize=15)
plt.legend(loc=1,
           prop={"size": 15},
           frameon=True,
           shadow=True,
           facecolor="White",
           edgecolor="black")
plt.show()

## Andrew Curves
from pandas.plotting import andrews_curves
andrews_curves(dataset.drop("Id", axis=1), "Species")
plt.title("Andrews Curve Plot", fontsize=20, fontweight="bold")
plt.xlabel("Features", fontsize=15)
plt.ylabel("Features Values", fontsize=15)
plt.legend(loc=1,
           prop={"size": 5},
           frameon=True,
           shadow=True,
           facecolor="White",
           edgecolor="black")
plt.show()

## BoxPlots
plt.figure()
dataset.drop("Id", axis=1).boxplot(by="Species", figsize=(15, 10))
plt.show()
 def display_andrews_graph(self):
     pdplt.andrews_curves(df, "output", ax=None)
     plt.show()
    def draw_func(self):
        if len(self.all_data.index) == 0:
            self.show_message('请导入采样数据')
        elif len(self.info_data.index) == 0:
            self.show_message('请导入采样信息')
        elif not self.region_linked:
            self.show_message("请点击链接")
        elif not self.figure_able:
            self.show_message("数据包含非数值类型,不可画图!")
        else:
            self.clear_func()
            self.cur_slice()
            # 解决无法显示中文
            plt.rcParams['font.sans-serif'] = ['Arial Unicode MS']
            # plt.rcParams['font.sans-serif']=['SimHei'] #指定默认字体,SimHei为黑体
            # 解决无法显示负号
            plt.rcParams['axes.unicode_minus'] = False
            plt.title(self.figure_type)
            if self.figure_type == "主成分分析":
                region_data = self.cur_data.iloc[:, 0].values.tolist()
                print(region_data)
                regions = list(set(region_data))
                print(regions)
                region_color = [(int(regions.index(i) * 255 / len(regions))) for i in region_data]
                # region_color = [regions.index[i] for i in region_data]
                print(region_color)
                data = self.cur_data.iloc[:, 1:].values
                data = data - np.mean(data, axis=0)
                print("data",data.shape)
                cov_mat = np.cov(data, rowvar=0)
                print("cov:", cov_mat.shape)

                eig_vals, eig_vects = np.linalg.eig(np.mat(cov_mat))
                low_data_mat = data * eig_vects
                print("low:", low_data_mat.shape)
                eig_val_indice = np.argsort(eig_vals)

                top = 2
                n_eig_val_indice = range(top)
                print("n_eig_val_indice", n_eig_val_indice)
                n_eig_vects = eig_vects[:, n_eig_val_indice]
                print("n_eig:",n_eig_vects.shape)
                recon_mat = (low_data_mat * eig_vects) + np.mean(data, axis=0)
                print("rec:", recon_mat.shape)
                x = np.array(low_data_mat)[:, 0]
                y = np.array(low_data_mat)[:, 1]
                # z = np.array(low_data_mat)[:, 2]
                for region in regions:
                    index = [i for i, data in enumerate(region_data) if data == region]
                    plt.scatter(x[index], y[index])
                plt.legend(regions)
            elif self.figure_type == '平行坐标图':
                parallel_coordinates(self.cur_data, self.region_method)
            elif self.figure_type == "Andrews图":
                colors = ['b', 'g', 'r', 'orange']
                andrews_curves(self.cur_data, self.region_method, color=colors)
            elif self.figure_type == 'Radiv图':
                radviz(self.cur_data, self.region_method)
            elif self.figure_type == '矩阵散点图':
                print("绘制矩阵散点图")
                sns.pairplot(data=self.cur_data, hue=self.region_method)
                f = plt.gcf()
                self.ax = f
                self.canvas = FigureCanvas(f)
            elif self.figure_type == 'Chernoff脸谱图':
                self.cur_data.to_excel('cur_data.xlsx')
                print("data out")
                # goto_r()
                os.system("python ./PyToR.py")
                face_info = pd.read_csv('face_info.csv')
                # f_str = face_info.to_string()

                font = {'weight': 'normal',
                         'size': 11,
                         }

                plt.text(500, 0 , "脸谱图条目                 数据列", fontdict=font)
                for index, row in face_info.iterrows():
                    f_str = row[0] + " : "
                    plt.text(500, 20 + 20 * index, f_str, fontdict=font)
                    f_str = row[1]
                    plt.text(650, 30 + 20 * index, f_str, fontdict=font)
                plt.imshow(Image.open('face.png'))
                plt.gca().add_patch(plt.Rectangle(xy=(500, 20), width=100, height=300,
                                                  edgecolor=[1, 1, 1],
                                                  fill=False,
                                                  linewidth=2))
                # print("文件命名为:face.jpg")
                # info=pd.read_csv('face_info.csv',encoding='GBK')
                # print("effect of variables:\n{}".format(info))

            self.table_view.setVisible(False)
            self.canvas.setVisible(True)
            self.figure_layout.removeWidget(self.table_view)
            self.figure_layout.addWidget(self.canvas)
            self.canvas.draw()
            self.figure_state = 2
                  annot_kws={'size': 8},
                  vmax=1,
                  vmin=-1,
                  cmap='GnBu',
                  center=0)
bottom, top = ax2.get_ylim()
ax2.set_ylim(bottom + 0.5, top - 0.5)

#%%

####### COORDENADAS PARALELAS

from pandas.plotting import parallel_coordinates

caracteristicas_media.append("diagnosis")
parallel_coordinates(cancer_limpio_diagnostico[caracteristicas_media],
                     "diagnosis",
                     colormap='cool',
                     xticks=None)
plt.show()

#%%

####### CURVAS DE ANDREWS

from pandas.plotting import andrews_curves
andrews_curves(cancer_limpio_diagnostico[caracteristicas_media],
               "diagnosis",
               colormap='rainbow')
plt.show()
Exemple #8
0
from pandas.plotting import andrews_curves
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns

mtcars = pd.read_csv("../data/mtcars.csv")
mtcars.drop(['cars', 'carname'], axis=1, inplace=True)

_, ax = plt.subplots(dpi=100)

andrews_curves(mtcars, 'cyl', colormap='Set1')

sns.despine()

ax.set(xlim=(-3, 3), title='Andrews Curves of mtcars')
ax.grid(alpha=0.3)
plt.show()
 def time_plot_andrews_curves(self):
     andrews_curves(self.df, "Name")
Exemple #10
0
    def test_andrews_curves(self):
        from pandas.plotting import andrews_curves
        from matplotlib import cm

        df = self.iris

        _check_plot_works(andrews_curves, frame=df, class_column='Name')

        rgba = ('#556270', '#4ECDC4', '#C7F464')
        ax = _check_plot_works(andrews_curves,
                               frame=df,
                               class_column='Name',
                               color=rgba)
        self._check_colors(ax.get_lines()[:10],
                           linecolors=rgba,
                           mapping=df['Name'][:10])

        cnames = ['dodgerblue', 'aquamarine', 'seagreen']
        ax = _check_plot_works(andrews_curves,
                               frame=df,
                               class_column='Name',
                               color=cnames)
        self._check_colors(ax.get_lines()[:10],
                           linecolors=cnames,
                           mapping=df['Name'][:10])

        ax = _check_plot_works(andrews_curves,
                               frame=df,
                               class_column='Name',
                               colormap=cm.jet)
        cmaps = lmap(cm.jet, np.linspace(0, 1, df['Name'].nunique()))
        self._check_colors(ax.get_lines()[:10],
                           linecolors=cmaps,
                           mapping=df['Name'][:10])

        length = 10
        df = DataFrame({
            "A": random.rand(length),
            "B": random.rand(length),
            "C": random.rand(length),
            "Name": ["A"] * length
        })

        _check_plot_works(andrews_curves, frame=df, class_column='Name')

        rgba = ('#556270', '#4ECDC4', '#C7F464')
        ax = _check_plot_works(andrews_curves,
                               frame=df,
                               class_column='Name',
                               color=rgba)
        self._check_colors(ax.get_lines()[:10],
                           linecolors=rgba,
                           mapping=df['Name'][:10])

        cnames = ['dodgerblue', 'aquamarine', 'seagreen']
        ax = _check_plot_works(andrews_curves,
                               frame=df,
                               class_column='Name',
                               color=cnames)
        self._check_colors(ax.get_lines()[:10],
                           linecolors=cnames,
                           mapping=df['Name'][:10])

        ax = _check_plot_works(andrews_curves,
                               frame=df,
                               class_column='Name',
                               colormap=cm.jet)
        cmaps = lmap(cm.jet, np.linspace(0, 1, df['Name'].nunique()))
        self._check_colors(ax.get_lines()[:10],
                           linecolors=cmaps,
                           mapping=df['Name'][:10])

        colors = ['b', 'g', 'r']
        df = DataFrame({
            "A": [1, 2, 3],
            "B": [1, 2, 3],
            "C": [1, 2, 3],
            "Name": colors
        })
        ax = andrews_curves(df, 'Name', color=colors)
        handles, labels = ax.get_legend_handles_labels()
        self._check_colors(handles, linecolors=colors)

        with tm.assert_produces_warning(FutureWarning):
            andrews_curves(data=df, class_column='Name')
Exemple #11
0
todo_selectbox = st.sidebar.selectbox(
    "Что вы хотите сделать?", ("Ознакомиться с данными", "Кластеризовать"))

if todo_selectbox == "Ознакомиться с данными":
    visualize_selectbox = st.sidebar.selectbox(
        "Выберите метод визуализции",
        ("Общие сведения", "Andrew Curves for Gender",
         "Распределение по возрасту и доходу", "По gender",
         "Распределение по возрасту", "Распределение по доходу",
         "Распредленеие по оценке расходов", "Есть ли кореляция",
         "Пол и оценка расходов"))
    if visualize_selectbox == "Andrew Curves for Gender":
        plt.rcParams['figure.figsize'] = (15, 10)
        plotting.andrews_curves(data.drop("CustomerID", axis=1),
                                "Gender",
                                color=list(["lightsteelblue", "pink"]))
        plt.title('Andrew Curves for Gender', fontsize=20)
        st.markdown(
            'Можно заметить, что линии соответствующие похожим **значениям** также имеют и схожую **форму**.'
        )
        st.pyplot(plt)
        # plt.show()
    elif visualize_selectbox == "Общие сведения":
        st.markdown('_CustomerID_ - уникальный идентификатор пользователя')
        st.markdown('_Gender_ - пол')
        st.markdown('_Age_ - возраст')
        st.markdown('_Annual Income_ - годовой доход клиента')
        st.markdown(
            '_Spending Score_ - оценка, присвоенная торговым центром на основе поведения клиентов и характера расходов'
        )
Exemple #12
0
import subprocess
import pandas as pd
import numpy as np
import serial
from sklearn.tree import DecisionTreeClassifier, export_graphviz
from tkinter import *
#import plotly.plotly as py

#reading the training data and storing it in pandas dataframe
df = pd.read_csv("/home/scas/Documents/test_file1.csv",
                 names=['humidity', 'temp', 'moisture', 'LDR', 'output'])

print(df['output'].unique())
sns.pairplot(df, hue="output", size=3)
plt.show()
pdplt.andrews_curves(df, "output", ax=None)
plt.show()


#modifying the dataframe, encoding the categorical variables to integers since the decision_tree classifier takes
#only integer inputs
def encode_target(df, target_column):
    df_mod = df.copy()
    targets = df_mod[target_column].unique()
    map_to_int = {name: n for n, name in enumerate(targets)}
    df_mod["Target"] = df_mod[target_column].replace(map_to_int)
    return df_mod, targets


df2, targets = encode_target(df, "output")
Exemple #13
0
# In[69]:

sns.kdeplot(df.last_evaluation, df.average_montly_hours, shade=True)

# ## Andrews Curve

# In[36]:

df_andrews = df.select_dtypes(exclude=['object']).astype(int)
df_andrews.head()

# In[37]:

from pandas.plotting import andrews_curves
plt.figure(figsize=(12, 12))
andrews_curves(df_andrews, 'left')

# ## Hexbin Plot

# In[71]:

x, y = df.average_montly_hours, df.satisfaction_level
fig, ax = plt.subplots()
hx = ax.hexbin(x, y, cmap='Greens', gridsize=10)
fig.colorbar(hx)
plt.show()

# In[34]:

sns.jointplot(x='average_montly_hours',
              y='satisfaction_level',