def time_plot_andrews_curves(self): andrews_curves(self.df, "Name")
def test_andrews_curves(self): from pandas.plotting import andrews_curves from matplotlib import cm df = self.iris _check_plot_works(andrews_curves, frame=df, class_column='Name') rgba = ('#556270', '#4ECDC4', '#C7F464') ax = _check_plot_works(andrews_curves, frame=df, class_column='Name', color=rgba) self._check_colors( ax.get_lines()[:10], linecolors=rgba, mapping=df['Name'][:10]) cnames = ['dodgerblue', 'aquamarine', 'seagreen'] ax = _check_plot_works(andrews_curves, frame=df, class_column='Name', color=cnames) self._check_colors( ax.get_lines()[:10], linecolors=cnames, mapping=df['Name'][:10]) ax = _check_plot_works(andrews_curves, frame=df, class_column='Name', colormap=cm.jet) cmaps = lmap(cm.jet, np.linspace(0, 1, df['Name'].nunique())) self._check_colors( ax.get_lines()[:10], linecolors=cmaps, mapping=df['Name'][:10]) length = 10 df = DataFrame({"A": random.rand(length), "B": random.rand(length), "C": random.rand(length), "Name": ["A"] * length}) _check_plot_works(andrews_curves, frame=df, class_column='Name') rgba = ('#556270', '#4ECDC4', '#C7F464') ax = _check_plot_works(andrews_curves, frame=df, class_column='Name', color=rgba) self._check_colors( ax.get_lines()[:10], linecolors=rgba, mapping=df['Name'][:10]) cnames = ['dodgerblue', 'aquamarine', 'seagreen'] ax = _check_plot_works(andrews_curves, frame=df, class_column='Name', color=cnames) self._check_colors( ax.get_lines()[:10], linecolors=cnames, mapping=df['Name'][:10]) ax = _check_plot_works(andrews_curves, frame=df, class_column='Name', colormap=cm.jet) cmaps = lmap(cm.jet, np.linspace(0, 1, df['Name'].nunique())) self._check_colors( ax.get_lines()[:10], linecolors=cmaps, mapping=df['Name'][:10]) colors = ['b', 'g', 'r'] df = DataFrame({"A": [1, 2, 3], "B": [1, 2, 3], "C": [1, 2, 3], "Name": colors}) ax = andrews_curves(df, 'Name', color=colors) handles, labels = ax.get_legend_handles_labels() self._check_colors(handles, linecolors=colors) with tm.assert_produces_warning(FutureWarning): andrews_curves(data=df, class_column='Name')
def test_andrews_curves(self, iris): from pandas.plotting import andrews_curves from matplotlib import cm df = iris _check_plot_works(andrews_curves, frame=df, class_column="Name") rgba = ("#556270", "#4ECDC4", "#C7F464") ax = _check_plot_works(andrews_curves, frame=df, class_column="Name", color=rgba) self._check_colors(ax.get_lines()[:10], linecolors=rgba, mapping=df["Name"][:10]) cnames = ["dodgerblue", "aquamarine", "seagreen"] ax = _check_plot_works(andrews_curves, frame=df, class_column="Name", color=cnames) self._check_colors(ax.get_lines()[:10], linecolors=cnames, mapping=df["Name"][:10]) ax = _check_plot_works(andrews_curves, frame=df, class_column="Name", colormap=cm.jet) cmaps = [cm.jet(n) for n in np.linspace(0, 1, df["Name"].nunique())] self._check_colors(ax.get_lines()[:10], linecolors=cmaps, mapping=df["Name"][:10]) length = 10 df = DataFrame({ "A": random.rand(length), "B": random.rand(length), "C": random.rand(length), "Name": ["A"] * length, }) _check_plot_works(andrews_curves, frame=df, class_column="Name") rgba = ("#556270", "#4ECDC4", "#C7F464") ax = _check_plot_works(andrews_curves, frame=df, class_column="Name", color=rgba) self._check_colors(ax.get_lines()[:10], linecolors=rgba, mapping=df["Name"][:10]) cnames = ["dodgerblue", "aquamarine", "seagreen"] ax = _check_plot_works(andrews_curves, frame=df, class_column="Name", color=cnames) self._check_colors(ax.get_lines()[:10], linecolors=cnames, mapping=df["Name"][:10]) ax = _check_plot_works(andrews_curves, frame=df, class_column="Name", colormap=cm.jet) cmaps = [cm.jet(n) for n in np.linspace(0, 1, df["Name"].nunique())] self._check_colors(ax.get_lines()[:10], linecolors=cmaps, mapping=df["Name"][:10]) colors = ["b", "g", "r"] df = DataFrame({ "A": [1, 2, 3], "B": [1, 2, 3], "C": [1, 2, 3], "Name": colors }) ax = andrews_curves(df, "Name", color=colors) handles, labels = ax.get_legend_handles_labels() self._check_colors(handles, linecolors=colors) with tm.assert_produces_warning(FutureWarning): andrews_curves(data=df, class_column="Name")
plt.figure(figsize=(15, 10)) parallel_coordinates(dataset.drop("Id", axis=1), "Species") plt.title("Parallel Coordinates Plot", fontsize=20, fontweight="bold") plt.xlabel("Features", fontsize=15) plt.ylabel("Features Values", fontsize=15) plt.legend(loc=1, prop={"size": 15}, frameon=True, shadow=True, facecolor="White", edgecolor="black") plt.show() ## Andrew Curves from pandas.plotting import andrews_curves andrews_curves(dataset.drop("Id", axis=1), "Species") plt.title("Andrews Curve Plot", fontsize=20, fontweight="bold") plt.xlabel("Features", fontsize=15) plt.ylabel("Features Values", fontsize=15) plt.legend(loc=1, prop={"size": 5}, frameon=True, shadow=True, facecolor="White", edgecolor="black") plt.show() ## BoxPlots plt.figure() dataset.drop("Id", axis=1).boxplot(by="Species", figsize=(15, 10)) plt.show()
def display_andrews_graph(self): pdplt.andrews_curves(df, "output", ax=None) plt.show()
def draw_func(self): if len(self.all_data.index) == 0: self.show_message('请导入采样数据') elif len(self.info_data.index) == 0: self.show_message('请导入采样信息') elif not self.region_linked: self.show_message("请点击链接") elif not self.figure_able: self.show_message("数据包含非数值类型,不可画图!") else: self.clear_func() self.cur_slice() # 解决无法显示中文 plt.rcParams['font.sans-serif'] = ['Arial Unicode MS'] # plt.rcParams['font.sans-serif']=['SimHei'] #指定默认字体,SimHei为黑体 # 解决无法显示负号 plt.rcParams['axes.unicode_minus'] = False plt.title(self.figure_type) if self.figure_type == "主成分分析": region_data = self.cur_data.iloc[:, 0].values.tolist() print(region_data) regions = list(set(region_data)) print(regions) region_color = [(int(regions.index(i) * 255 / len(regions))) for i in region_data] # region_color = [regions.index[i] for i in region_data] print(region_color) data = self.cur_data.iloc[:, 1:].values data = data - np.mean(data, axis=0) print("data",data.shape) cov_mat = np.cov(data, rowvar=0) print("cov:", cov_mat.shape) eig_vals, eig_vects = np.linalg.eig(np.mat(cov_mat)) low_data_mat = data * eig_vects print("low:", low_data_mat.shape) eig_val_indice = np.argsort(eig_vals) top = 2 n_eig_val_indice = range(top) print("n_eig_val_indice", n_eig_val_indice) n_eig_vects = eig_vects[:, n_eig_val_indice] print("n_eig:",n_eig_vects.shape) recon_mat = (low_data_mat * eig_vects) + np.mean(data, axis=0) print("rec:", recon_mat.shape) x = np.array(low_data_mat)[:, 0] y = np.array(low_data_mat)[:, 1] # z = np.array(low_data_mat)[:, 2] for region in regions: index = [i for i, data in enumerate(region_data) if data == region] plt.scatter(x[index], y[index]) plt.legend(regions) elif self.figure_type == '平行坐标图': parallel_coordinates(self.cur_data, self.region_method) elif self.figure_type == "Andrews图": colors = ['b', 'g', 'r', 'orange'] andrews_curves(self.cur_data, self.region_method, color=colors) elif self.figure_type == 'Radiv图': radviz(self.cur_data, self.region_method) elif self.figure_type == '矩阵散点图': print("绘制矩阵散点图") sns.pairplot(data=self.cur_data, hue=self.region_method) f = plt.gcf() self.ax = f self.canvas = FigureCanvas(f) elif self.figure_type == 'Chernoff脸谱图': self.cur_data.to_excel('cur_data.xlsx') print("data out") # goto_r() os.system("python ./PyToR.py") face_info = pd.read_csv('face_info.csv') # f_str = face_info.to_string() font = {'weight': 'normal', 'size': 11, } plt.text(500, 0 , "脸谱图条目 数据列", fontdict=font) for index, row in face_info.iterrows(): f_str = row[0] + " : " plt.text(500, 20 + 20 * index, f_str, fontdict=font) f_str = row[1] plt.text(650, 30 + 20 * index, f_str, fontdict=font) plt.imshow(Image.open('face.png')) plt.gca().add_patch(plt.Rectangle(xy=(500, 20), width=100, height=300, edgecolor=[1, 1, 1], fill=False, linewidth=2)) # print("文件命名为:face.jpg") # info=pd.read_csv('face_info.csv',encoding='GBK') # print("effect of variables:\n{}".format(info)) self.table_view.setVisible(False) self.canvas.setVisible(True) self.figure_layout.removeWidget(self.table_view) self.figure_layout.addWidget(self.canvas) self.canvas.draw() self.figure_state = 2
annot_kws={'size': 8}, vmax=1, vmin=-1, cmap='GnBu', center=0) bottom, top = ax2.get_ylim() ax2.set_ylim(bottom + 0.5, top - 0.5) #%% ####### COORDENADAS PARALELAS from pandas.plotting import parallel_coordinates caracteristicas_media.append("diagnosis") parallel_coordinates(cancer_limpio_diagnostico[caracteristicas_media], "diagnosis", colormap='cool', xticks=None) plt.show() #%% ####### CURVAS DE ANDREWS from pandas.plotting import andrews_curves andrews_curves(cancer_limpio_diagnostico[caracteristicas_media], "diagnosis", colormap='rainbow') plt.show()
from pandas.plotting import andrews_curves import matplotlib.pyplot as plt import pandas as pd import seaborn as sns mtcars = pd.read_csv("../data/mtcars.csv") mtcars.drop(['cars', 'carname'], axis=1, inplace=True) _, ax = plt.subplots(dpi=100) andrews_curves(mtcars, 'cyl', colormap='Set1') sns.despine() ax.set(xlim=(-3, 3), title='Andrews Curves of mtcars') ax.grid(alpha=0.3) plt.show()
def test_andrews_curves(self): from pandas.plotting import andrews_curves from matplotlib import cm df = self.iris _check_plot_works(andrews_curves, frame=df, class_column='Name') rgba = ('#556270', '#4ECDC4', '#C7F464') ax = _check_plot_works(andrews_curves, frame=df, class_column='Name', color=rgba) self._check_colors(ax.get_lines()[:10], linecolors=rgba, mapping=df['Name'][:10]) cnames = ['dodgerblue', 'aquamarine', 'seagreen'] ax = _check_plot_works(andrews_curves, frame=df, class_column='Name', color=cnames) self._check_colors(ax.get_lines()[:10], linecolors=cnames, mapping=df['Name'][:10]) ax = _check_plot_works(andrews_curves, frame=df, class_column='Name', colormap=cm.jet) cmaps = lmap(cm.jet, np.linspace(0, 1, df['Name'].nunique())) self._check_colors(ax.get_lines()[:10], linecolors=cmaps, mapping=df['Name'][:10]) length = 10 df = DataFrame({ "A": random.rand(length), "B": random.rand(length), "C": random.rand(length), "Name": ["A"] * length }) _check_plot_works(andrews_curves, frame=df, class_column='Name') rgba = ('#556270', '#4ECDC4', '#C7F464') ax = _check_plot_works(andrews_curves, frame=df, class_column='Name', color=rgba) self._check_colors(ax.get_lines()[:10], linecolors=rgba, mapping=df['Name'][:10]) cnames = ['dodgerblue', 'aquamarine', 'seagreen'] ax = _check_plot_works(andrews_curves, frame=df, class_column='Name', color=cnames) self._check_colors(ax.get_lines()[:10], linecolors=cnames, mapping=df['Name'][:10]) ax = _check_plot_works(andrews_curves, frame=df, class_column='Name', colormap=cm.jet) cmaps = lmap(cm.jet, np.linspace(0, 1, df['Name'].nunique())) self._check_colors(ax.get_lines()[:10], linecolors=cmaps, mapping=df['Name'][:10]) colors = ['b', 'g', 'r'] df = DataFrame({ "A": [1, 2, 3], "B": [1, 2, 3], "C": [1, 2, 3], "Name": colors }) ax = andrews_curves(df, 'Name', color=colors) handles, labels = ax.get_legend_handles_labels() self._check_colors(handles, linecolors=colors) with tm.assert_produces_warning(FutureWarning): andrews_curves(data=df, class_column='Name')
todo_selectbox = st.sidebar.selectbox( "Что вы хотите сделать?", ("Ознакомиться с данными", "Кластеризовать")) if todo_selectbox == "Ознакомиться с данными": visualize_selectbox = st.sidebar.selectbox( "Выберите метод визуализции", ("Общие сведения", "Andrew Curves for Gender", "Распределение по возрасту и доходу", "По gender", "Распределение по возрасту", "Распределение по доходу", "Распредленеие по оценке расходов", "Есть ли кореляция", "Пол и оценка расходов")) if visualize_selectbox == "Andrew Curves for Gender": plt.rcParams['figure.figsize'] = (15, 10) plotting.andrews_curves(data.drop("CustomerID", axis=1), "Gender", color=list(["lightsteelblue", "pink"])) plt.title('Andrew Curves for Gender', fontsize=20) st.markdown( 'Можно заметить, что линии соответствующие похожим **значениям** также имеют и схожую **форму**.' ) st.pyplot(plt) # plt.show() elif visualize_selectbox == "Общие сведения": st.markdown('_CustomerID_ - уникальный идентификатор пользователя') st.markdown('_Gender_ - пол') st.markdown('_Age_ - возраст') st.markdown('_Annual Income_ - годовой доход клиента') st.markdown( '_Spending Score_ - оценка, присвоенная торговым центром на основе поведения клиентов и характера расходов' )
import subprocess import pandas as pd import numpy as np import serial from sklearn.tree import DecisionTreeClassifier, export_graphviz from tkinter import * #import plotly.plotly as py #reading the training data and storing it in pandas dataframe df = pd.read_csv("/home/scas/Documents/test_file1.csv", names=['humidity', 'temp', 'moisture', 'LDR', 'output']) print(df['output'].unique()) sns.pairplot(df, hue="output", size=3) plt.show() pdplt.andrews_curves(df, "output", ax=None) plt.show() #modifying the dataframe, encoding the categorical variables to integers since the decision_tree classifier takes #only integer inputs def encode_target(df, target_column): df_mod = df.copy() targets = df_mod[target_column].unique() map_to_int = {name: n for n, name in enumerate(targets)} df_mod["Target"] = df_mod[target_column].replace(map_to_int) return df_mod, targets df2, targets = encode_target(df, "output")
# In[69]: sns.kdeplot(df.last_evaluation, df.average_montly_hours, shade=True) # ## Andrews Curve # In[36]: df_andrews = df.select_dtypes(exclude=['object']).astype(int) df_andrews.head() # In[37]: from pandas.plotting import andrews_curves plt.figure(figsize=(12, 12)) andrews_curves(df_andrews, 'left') # ## Hexbin Plot # In[71]: x, y = df.average_montly_hours, df.satisfaction_level fig, ax = plt.subplots() hx = ax.hexbin(x, y, cmap='Greens', gridsize=10) fig.colorbar(hx) plt.show() # In[34]: sns.jointplot(x='average_montly_hours', y='satisfaction_level',