def plot_parallel(polt_data, m, name): df = pd.DataFrame(data=polt_data, columns=[i + 1 for i in range(m)]) df['0'] = pd.Series(1, index=df.index) fig = plt.figure(figsize=(12, 9), dpi=200) plt.title('%s point number: %d' % (name, len(polt_data))) parallel_coordinates(df, class_column='0') plt.show()
def parallelPlot(df, yearI, yearF, target='Años', xlabel='Topics', ylabel='Valoración promedio', title='Valoración por año'): data = expandReviewTime(df) dfVideoGamesSummary1 = pd.pivot_table(data, values='overall', index=['maximos'], columns=['año'], aggfunc=np.average) dfVideoGamesSummary1 = dfVideoGamesSummary1.transpose() dfVideoGamesSummary1[target] = dfVideoGamesSummary1.index Years = [" " + str(i) for i in range(yearI, yearF)] #; Years.append('Years') dfVideoGamesSummary1 = dfVideoGamesSummary1.drop(Years) #dfVideoGamesSummary2 = dfVideoGamesSummary1[Years] plt.figure() plt.ylabel(ylabel) plt.xlabel(xlabel) plt.title(title) parallel_coordinates(dfVideoGamesSummary1, target, colormap='gist_rainbow') plt.show()
def cluster(self): df1 = self.read_csv("correct_file_characterristic1") clst = [[127], [558], [662], [691], [793], [802], [834], [876], [1019]] type = 0 for i in clst: tempDic = dict() for x in range(24): tempDic[x] = [] for j in i: # print(j) l = np.array(df1.iloc[[j]]).tolist()[0] for idex in range(0, 24): s = 0 for ge in range(0, 10): indx = idex * 10 + ge s += l[indx] if s > 0: tempDic[idex].append(1) else: tempDic[idex].append(-1) # for idex,item in enumerate(l): # if idex< 30: # tempDic[idex].append(item) # for y in tempDic: # print(tempDic[y]) tf = pd.DataFrame(tempDic) parallel_coordinates(tf, 0, color="red") plt.savefig("myresult/" + str(type) + ".svg") plt.close() type += 1
def test_parallel_coordinates(self, iris): from pandas.plotting import parallel_coordinates from matplotlib import cm df = iris ax = _check_plot_works(parallel_coordinates, frame=df, class_column="Name") nlines = len(ax.get_lines()) nxticks = len(ax.xaxis.get_ticklabels()) rgba = ("#556270", "#4ECDC4", "#C7F464") ax = _check_plot_works(parallel_coordinates, frame=df, class_column="Name", color=rgba) self._check_colors(ax.get_lines()[:10], linecolors=rgba, mapping=df["Name"][:10]) cnames = ["dodgerblue", "aquamarine", "seagreen"] ax = _check_plot_works(parallel_coordinates, frame=df, class_column="Name", color=cnames) self._check_colors(ax.get_lines()[:10], linecolors=cnames, mapping=df["Name"][:10]) ax = _check_plot_works(parallel_coordinates, frame=df, class_column="Name", colormap=cm.jet) cmaps = [cm.jet(n) for n in np.linspace(0, 1, df["Name"].nunique())] self._check_colors(ax.get_lines()[:10], linecolors=cmaps, mapping=df["Name"][:10]) ax = _check_plot_works(parallel_coordinates, frame=df, class_column="Name", axvlines=False) assert len(ax.get_lines()) == (nlines - nxticks) colors = ["b", "g", "r"] df = DataFrame({ "A": [1, 2, 3], "B": [1, 2, 3], "C": [1, 2, 3], "Name": colors }) ax = parallel_coordinates(df, "Name", color=colors) handles, labels = ax.get_legend_handles_labels() self._check_colors(handles, linecolors=colors) with tm.assert_produces_warning(FutureWarning): parallel_coordinates(data=df, class_column="Name") with tm.assert_produces_warning(FutureWarning): parallel_coordinates(df, "Name", colors=colors)
def test_parallel_coordinates(self, iris): from pandas.plotting import parallel_coordinates from matplotlib import cm df = iris ax = _check_plot_works(parallel_coordinates, frame=df, class_column='Name') nlines = len(ax.get_lines()) nxticks = len(ax.xaxis.get_ticklabels()) rgba = ('#556270', '#4ECDC4', '#C7F464') ax = _check_plot_works(parallel_coordinates, frame=df, class_column='Name', color=rgba) self._check_colors(ax.get_lines()[:10], linecolors=rgba, mapping=df['Name'][:10]) cnames = ['dodgerblue', 'aquamarine', 'seagreen'] ax = _check_plot_works(parallel_coordinates, frame=df, class_column='Name', color=cnames) self._check_colors(ax.get_lines()[:10], linecolors=cnames, mapping=df['Name'][:10]) ax = _check_plot_works(parallel_coordinates, frame=df, class_column='Name', colormap=cm.jet) cmaps = lmap(cm.jet, np.linspace(0, 1, df['Name'].nunique())) self._check_colors(ax.get_lines()[:10], linecolors=cmaps, mapping=df['Name'][:10]) ax = _check_plot_works(parallel_coordinates, frame=df, class_column='Name', axvlines=False) assert len(ax.get_lines()) == (nlines - nxticks) colors = ['b', 'g', 'r'] df = DataFrame({ "A": [1, 2, 3], "B": [1, 2, 3], "C": [1, 2, 3], "Name": colors }) ax = parallel_coordinates(df, 'Name', color=colors) handles, labels = ax.get_legend_handles_labels() self._check_colors(handles, linecolors=colors) with tm.assert_produces_warning(FutureWarning): parallel_coordinates(data=df, class_column='Name') with tm.assert_produces_warning(FutureWarning): parallel_coordinates(df, 'Name', colors=colors)
def parallel_plot(data): my_colors = list(islice(cycle(['b', 'r', 'g', 'y', 'k']), None, len(data))) plt.figure(figsize=(15, 8)).gca().axes.set_ylim([-3, +3]) parallel_coordinates(data, 'prediction', color=my_colors, marker='o')
def parallel_plot(data, rg): """ Plot the parallel plots for k-means class representatives """ my_colors = list(islice(cycle(["b", "r", "g", "y", "k"]), None, len(data))) plt.figure(figsize=(18, 8)).gca().axes.set_ylim(rg) parallel_coordinates(data, "prediction", color=my_colors, marker="o")
def display_parallel_coordinates(df, num_clusters): '''Display a parallel coordinates plot for the clusters in df''' # Select data points for individual clusters cluster_points = [] for i in range(num_clusters): cluster_points.append(df[df.cluster == i]) # Create the plot fig = plt.figure(figsize=(12, 15)) title = fig.suptitle("Parallel Coordinates Plot for the Clusters", fontsize=18) fig.subplots_adjust(top=0.95, wspace=0) # Display one plot for each cluster, with the lines for the main cluster appearing over the lines for the other clusters for i in range(num_clusters): plt.subplot(num_clusters, 1, i + 1) for j, c in enumerate(cluster_points): if i != j: pc = parallel_coordinates(c, 'cluster', color=[addAlpha(palette[j], 0.2)]) pc = parallel_coordinates(cluster_points[i], 'cluster', color=[addAlpha(palette[i], 0.5)]) # Stagger the axes ax = plt.gca() for tick in ax.xaxis.get_major_ticks()[1::2]: tick.set_pad(20) plt.show() return fig
def plot_parallel_coordinates( df, col_name, sample_size=0.1, scale_cols=True, figsize=(20, 10), ): """ Take a sample from df, convert to Pandas and do on it a parallel coordinates plot of col_name vs other columns. Intended to see how the features correlate with the label and/or whether some features can separate the label values. :param df: spark dataframe. :param col_name: (string) column indicating the y axis values for all the lines. :param sample_size: (float) proportion of the data to sample for the plot. :param scale_cols: (bool) Whether to scale the features to the [0., 1.] interval. :param figsize: (tuple) :return: None """ numeric_features = list_numeric_features(df) if col_name not in numeric_features: numeric_features += [col_name] sampled_df = df.select(numeric_features).sample(False, sample_size).toPandas() if scale_cols: for name in sampled_df.columns: if name != col_name: sampled_df[name] -= sampled_df[name].min() sampled_df[name] /= sampled_df[name].max() plt.figure(figsize=figsize) parallel_coordinates(sampled_df, col_name) plt.xticks(rotation=90)
def demonstrate(wines): cp = sns.countplot(x="quality", hue="wine_type", data=wines, palette={ "red": "#FF9999", "white": "#FFE888" }) plt.show() # demonstrate some attributes for both red and white wine cols = [ 'density', 'residual sugar', 'total sulfur dioxide', 'fixed acidity' ] subset_df = wines[cols] ss = StandardScaler() scaled_df = ss.fit_transform(subset_df) scaled_df = pd.DataFrame(scaled_df, columns=cols) final_df = pd.concat([scaled_df, wines['wine_type']], axis=1) final_df.head() # plot parallel coordinates plt.figure() parallel_coordinates(final_df, 'wine_type', color=('#FFE888', '#FF9999')) plt.show()
def make_parallel_coordinates(self): fig, ax = plt.subplots() data = pd.DataFrame(self.transform.data) target = self.transform.load.partition.y_train.reset_index(drop=True) data = pd.concat([data, target], axis=1, sort=False) parallel_coordinates(data, *self.transform.load.outputs) plt.savefig(f"{self.dir_}/Visual/transform_parallel_coordinates.png",bbox_inches='tight') plt.close(fig)
def plot_parallel_coordinates(data): # format data features = extract_features(data) labels = extract_labels(data) data_to_plot = pd.concat([features, labels], axis=1) # plot parallel_coordinates(data_to_plot, 'Class') plt.show()
def parallel_plot(df, label): my_colors = ['r', 'g', 'b'] plt.figure(figsize=(10, 6)) plt.title('Clusters of iris') plt.xlabel('Flower Features') plt.ylabel('Standard Deviations') plt.tick_params(labelrotation=20) parallel_coordinates(df, label, color=my_colors, marker='o')
def plot_parallel(df): plt.figure(figsize=(12, 6)) plt.title(df.index[0]+' score of different algorithms', fontsize=15) parallel_coordinates(df, 'dataset') plt.grid(lw=0.1) plt.legend(loc=4) plt.ylabel(df.index[0], fontsize=14) plt.show()
def plot_parallel_coordinates(plotdata,label): plotdata['label']=label print(plotdata) plt.figure(figsize=(100,50),dpi=20) parallel_coordinates(plotdata,'label') plt.show() dims = len(plotdata[0]) print(dims)
def parallel_plot(data): from itertools import cycle, islice from pandas.plotting import parallel_coordinates import matplotlib.pyplot as plt my_colors = list(islice(cycle(['b', 'r', 'g', 'y', 'k']), None, len(data))) plt.figure(figsize=(15, 8)).gca().axes.set_ylim([-2.5, +2.5]) parallel_coordinates(data, 'prediction', color=my_colors, marker='o')
def parallel_coordinates_data_visualization(dataset_df): plt.figure(figsize=(10, 8)) parallel_coordinates(dataset_df, "class") plt.title('Parallel Coordinates Plot', fontsize=15, fontweight='bold') plt.xlabel('Features', fontsize=15) plt.ylabel('Features values', fontsize=15) plt.legend(loc=1, prop={'size': 15}, frameon=True, shadow=True, facecolor="white", edgecolor="black") plt.show()
def make_parallel_coordinates(self): fig, ax = plt.subplots() data = pd.DataFrame(self.transform.data, columns=self.transform.load.inputs) parallel_coordinates(data, *self.load.outputs) plt.savefig(f"Data/Visual/{self.transform}_parallel_coordinates.png", bbox_inches='tight') plt.close(fig)
def parallel_plot(data, title): plt.figure(figsize=(10, 6)) plt.ylim([-3, 3]) plt.title(title) plt.xlabel('Weather Features') plt.ylabel('Standard Deviation') plt.tick_params(labelrotation=20) my_colors = list(islice(cycle(['r', 'g', 'b', 'y', 'k']), None, len(data))) parallel_coordinates(data, 'prediction', color=my_colors, marker='o')
def EDA_way_1(): print("Info\n", df.info()) print("\n\n\nData Describe\n", df.describe()) print("\n\n\nFirst 5 Row of Data\n", df.head()) print("\n\n\nLast 5 Row of Data\n", df.tail()) print("\n\n\nData Types\n", df.dtypes) print("\n\n\nCounting the number of rows\n", df.count()) n_bins = 10 fig, axs = plt.subplots(2, 2) axs[0, 0].hist(df['SepalLengthCm'], bins=n_bins) axs[0, 0].set_title('Sepal Length') axs[0, 1].hist(df['SepalWidthCm'], bins=n_bins) axs[0, 1].set_title('Sepal Width') axs[1, 0].hist(df['PetalLengthCm'], bins=n_bins) axs[1, 0].set_title('Petal Length') axs[1, 1].hist(df['PetalWidthCm'], bins=n_bins) axs[1, 1].set_title('Petal Width') fig.tight_layout(pad=1.0) fig, axs = plt.subplots(2, 2) fn = ["SepalLengthCm", "SepalWidthCm", "PetalLengthCm", "PetalWidthCm"] cn = ['Iris-setosa', 'Iris-versicolor', 'Iris-virginica'] sns.boxplot(x='Species', y='SepalLengthCm', data=df, order=cn, ax=axs[0, 0]) sns.boxplot(x='Species', y='SepalWidthCm', data=df, order=cn, ax=axs[0, 1]) sns.boxplot(x='Species', y='PetalLengthCm', data=df, order=cn, ax=axs[1, 0]) sns.boxplot(x='Species', y='PetalWidthCm', data=df, order=cn, ax=axs[1, 1]) # add some spacing between subplots fig.tight_layout(pad=1.0) sns.violinplot(x="Species", y="PetalLengthCm", data=df, size=5, order=cn, palette='colorblind') sns.pairplot(df, hue="Species", height=2, palette='colorblind') corrmat = df.corr() sns.heatmap(corrmat, annot=True, square=True) parallel_coordinates(df, "Species", color=['blue', 'red', 'green'])
def parallel_coordinate(df,label): """ :param df: Data frame :param label: Label :return: None """ from pandas.plotting import parallel_coordinates parallel_coordinates(df,label) plt.show()
def snscord(self): self.dataFrameSet() parallel_coordinates(self.mainFrame, self.Header[-1]) py.xticks(rotation=90) py.show()
def plot_bicluster(matrix, bicluster, name="Bicluster"): bicluster_matrix = get_bicluster(matrix, bicluster) df = pd.DataFrame(bicluster_matrix) df["index"] = df.index.values parallel_coordinates(df, "index", linewidth=1.0) plt.title(name + "\nMean Squared Residue: " + str(bicluster.msr)) plt.xlabel('Condition') plt.ylabel('Expression level') plt.gca().legend_ = None plt.show()
def draw_coord(coords): # coordinates plot plt.close("all") plt.figure(figsize=(12, 10), dpi=80) parallel_coordinates(coords, 'rule') plt.legend([]) plt.grid(True) plt.title('TEDS attributes parallel coordinates plot', fontsize=14) plt.tight_layout() plt.savefig(os.path.join(figures_path, 'TEDS_basket_coordinate_plot.png')) plt.show()
def plot2d(antibody): dfplot = df.iloc[:, antibody.features] dfplot = pd.concat([dfplot, dfclass], axis=1, ignore_index=True) dfplot = pd.concat([dfplot, dflabel], axis=1, ignore_index=True) col = [str(x) for x in antibody.features] col.append("class_id") col.append("class") dfplot.columns = col xlabel = 'feature ' + col[0] ylabel = 'feature ' + col[1] plt.figure(figsize=(15, 10)) parallel_coordinates(dfplot, "class") plt.title('Parallel Coordinates Plot', fontsize=20, fontweight='bold') plt.xlabel('Features', fontsize=15) plt.ylabel('Features values', fontsize=15) plt.legend(loc=1, prop={'size': 15}, frameon=True, shadow=True, facecolor="white", edgecolor="black") filename = 'parallel_' + '_'.join(dfplot.columns) + '.png' plt.savefig(filename) colors = ["red", "green", "blue"] clr = { 'short_acting': "red", "medium_acting": "green", "Long_acting": "blue" } labels = ['short_acting', 'medium_acting', 'Long_acting'] # Create plot fig = plt.figure() ax = fig.add_subplot(1, 1, 1, axisbg="1.0") for row in dfplot.iterrows(): index, x, y, cl_id, cl = row[0], row[1][0], row[1][1], int( row[1][2]), row[1][3] ax.scatter(x, y, alpha=0.8, c=clr[cl], edgecolors='none', s=30, label=cl) ax.set_xlabel(xlabel) ax.set_ylabel(ylabel) plt.title('2d feature distribution') plt.legend() filename = 'scatter_' + '_'.join(dfplot.columns) + '.png' fig.savefig(filename) # save the figure to file plt.close(fig)
def plot_parallel_coordinates(gs, df, dpi, name=""): """ For the provided axes and the cluster, plots the basic information about the cluster in them. """ split_gs = matplotlib.gridspec.GridSpecFromSubplotSpec(2, 1, subplot_spec=gs, hspace=0.8) log_ax = plt.subplot(split_gs[0]) # noqa: F821 perc_ax = plt.subplot(split_gs[1]) # noqa: F821 log_columns = ["POSIX_RAW_agg_perf_by_slowest", "POSIX_RAW_total_bytes", "RAW_runtime", "POSIX_RAW_total_accesses", "RAW_nprocs", "POSIX_RAW_total_files", "apps_short"] perc_columns = ["POSIX_BYTES_READ_PERC", 'POSIX_read_only_bytes_perc', 'POSIX_read_write_bytes_perc', 'POSIX_write_only_bytes_perc', "POSIX_unique_files_perc", "apps_short"] # Preprocess the log table data log_data = df[log_columns].copy() log_data.POSIX_RAW_total_bytes /= 1024 ** 3 # Convert to gigabytes log_data.POSIX_RAW_total_accesses /= 1024 # Convert to kilo-transactions log_data.rename(columns={"POSIX_RAW_agg_perf_by_slowest": "Throughput\n[MB/s]", "POSIX_RAW_total_accesses": "R/W accesses\n(in 1000s)", "RAW_runtime": "runtime [s]", "RAW_nprocs": "App size\n(nprocs)", "POSIX_RAW_total_bytes": "Volume [GB]", "POSIX_RAW_total_files": "files\n(count)"}, inplace=True) # Preprocess the percentage table data perc_data = df[perc_columns].copy() perc_data.rename(columns={"POSIX_BYTES_READ_PERC": "Read ratio\n(by volume)", 'POSIX_read_only_bytes_perc': "RO files\n(by file #)", 'POSIX_read_write_bytes_perc': "R/W files\n(by file #)", 'POSIX_write_only_bytes_perc': "WO files\n(by file #)", "POSIX_unique_files_perc": "Unique files\n(by file #)"}, inplace=True) # Title log_ax.set_title("Cluster {}:\n {} jobs".format(name, np.sum(df.shape[0]))) # First, plot the logarithmic plot parallel_coordinates(log_data.sample(n=int(log_data.shape[0] / 10)), "apps_short", ax=log_ax, sort_labels=True, alpha=0.1) # for lh in log_ax.get_legend().legendHandles: lh.set_alpha(1) # noqa: E701 log_ax.get_legend().remove() log_ax.set_yscale('log') log_ax.grid(True) # log_ax.yaxis.set_minor_formatter(ticker.ScalarFormatter()) log_ax.set_xticklabels(log_ax.get_xticklabels(), rotation=37, ha="right", rotation_mode="anchor") log_ax.set_ylim(10**0, 10**6) # Then the percentage plot parallel_coordinates(perc_data.sample(n=int(perc_data.shape[0] / 10)), "apps_short", ax=perc_ax, sort_labels=True, alpha=0.1) # for lh in perc_ax.get_legend().legendHandles: lh.set_alpha(1) # noqa: E701 perc_ax.get_legend().remove() perc_ax.grid(True) plt.yticks([0, 0.25, 0.5, 0.75, 1.0], ["0%", "25%", "50%", "75%", "100%"]) perc_ax.yaxis.set_minor_formatter(ticker.ScalarFormatter()) perc_ax.set_xticklabels(perc_ax.get_xticklabels(), rotation=37, ha="right", rotation_mode="anchor") # Shift labels dx = 0; dy = 5/72. offset = matplotlib.transforms.ScaledTranslation(dx, dy, dpi) # apply offset transform to all x ticklabels. for label in log_ax.xaxis.get_majorticklabels(): label.set_transform(label.get_transform() + offset) for label in perc_ax.xaxis.get_majorticklabels(): label.set_transform(label.get_transform() + offset)
def display_k_centers(df, cluster_centers, cluster_labels): centers_df = pd.DataFrame(cluster_centers, index=['Means1', 'Means2'], columns=list(df.columns)) centers_df['cluster'] = [1, 2] ##print(centers_df) plt.figure(figsize=(7, 5)) plt.title('Clusters 1 and 2 means along 5 terms') parallel_coordinates(centers_df, 'cluster', color=['blue', 'red'], marker='o') plt.show()
def plot_gender_parallel_coordinates(): colors = ( '#1f77b4', '#ff7f0e', ) # Lightish blue and orange plt.figure(figsize=(12, 8)) parallel_coordinates(df.drop('CustomerID', axis=1).sort_values( by=['Gender', 'Annual_Income'], ascending=[True, False]), "Gender", color=colors) plt.title('Parallel Coords for Gender', fontsize=18) plt.tight_layout() plt.show()
def parallelPlot(colList, data): try: td = pd.DataFrame(columns=colList) for col in colList: td[col] = data[col] pp.parallel_coordinates(td, 'churn', color=('#556270', '#4ECDC4')) plt.show() except Exception as ex: print "-----------------------------------------------------------------------" template = "An exception of type {0} occurred. Arguments:\n{1!r}" message = template.format(type(ex).__name__, ex.args) print message
def add_reference_potentials_to_plot(self, ax, qoi_names, reference_data_marker_size=300, reference_data_marker_type='|', reference_data_colors=None): _default_data_colors = self.reference_data_colors_default _reference_potential_names = [ k for k in self.configuration.reference_potentials ] if reference_data_marker_size is not None: self.reference_data_marker_size = reference_data_marker_size if reference_data_marker_type is not None: self.reference_data_marker_type = reference_data_marker_type if reference_data_colors is not None: if isinstance(reference_data_colors, dict): self.reference_data_colors = copy.deepcopy( reference_data_colors) elif isinstance(reference_data_colors, list): _default_data_colors = list(reference_data_colors) self.reference_data_colors = OrderedDict() for i, v in enumerate(_reference_potential_names): self.reference_data_colors[v] = _default_data_colors[i] else: self.reference_data_colors = OrderedDict() for i, v in enumerate(_reference_potential_names): self.reference_data_colors[v] = _default_data_colors[i] _marker_size = self.reference_data_marker_size _marker_type = self.reference_data_marker_type _marker_color = self.reference_data_colors _col_names = [] for q in qoi_names: if q in self.configuration.qoi_names: _col_names.append('{}.nerr'.format(q)) elif q in self.configuration.qoi_validation_names: _col_names.append('{}.nerr_v'.format(q)) else: _col_names.append(q) for v in _reference_potential_names: _df = self.df.loc[self.df['sim_id'].isin([v])] parallel_coordinates(_df, 'sim_id', axvlines=False, cols=_col_names, color=_marker_color[v])
def test_get_standard_colors_random_seed(self): # GH17525 df = DataFrame(np.zeros((10, 10))) # Make sure that the random seed isn't reset by _get_standard_colors plotting.parallel_coordinates(df, 0) rand1 = random.random() plotting.parallel_coordinates(df, 0) rand2 = random.random() assert rand1 != rand2 # Make sure it produces the same colors every time it's called from pandas.plotting._style import _get_standard_colors color1 = _get_standard_colors(1, color_type='random') color2 = _get_standard_colors(1, color_type='random') assert color1 == color2
def test_parallel_coordinates(self): from pandas.plotting import parallel_coordinates from matplotlib import cm df = self.iris ax = _check_plot_works(parallel_coordinates, frame=df, class_column='Name') nlines = len(ax.get_lines()) nxticks = len(ax.xaxis.get_ticklabels()) rgba = ('#556270', '#4ECDC4', '#C7F464') ax = _check_plot_works(parallel_coordinates, frame=df, class_column='Name', color=rgba) self._check_colors( ax.get_lines()[:10], linecolors=rgba, mapping=df['Name'][:10]) cnames = ['dodgerblue', 'aquamarine', 'seagreen'] ax = _check_plot_works(parallel_coordinates, frame=df, class_column='Name', color=cnames) self._check_colors( ax.get_lines()[:10], linecolors=cnames, mapping=df['Name'][:10]) ax = _check_plot_works(parallel_coordinates, frame=df, class_column='Name', colormap=cm.jet) cmaps = lmap(cm.jet, np.linspace(0, 1, df['Name'].nunique())) self._check_colors( ax.get_lines()[:10], linecolors=cmaps, mapping=df['Name'][:10]) ax = _check_plot_works(parallel_coordinates, frame=df, class_column='Name', axvlines=False) assert len(ax.get_lines()) == (nlines - nxticks) colors = ['b', 'g', 'r'] df = DataFrame({"A": [1, 2, 3], "B": [1, 2, 3], "C": [1, 2, 3], "Name": colors}) ax = parallel_coordinates(df, 'Name', color=colors) handles, labels = ax.get_legend_handles_labels() self._check_colors(handles, linecolors=colors) with tm.assert_produces_warning(FutureWarning): parallel_coordinates(data=df, class_column='Name') with tm.assert_produces_warning(FutureWarning): parallel_coordinates(df, 'Name', colors=colors)
def test_parallel_coordinates_with_sorted_labels(self): """ For #15908 """ from pandas.plotting import parallel_coordinates df = DataFrame({"feat": [i for i in range(30)], "class": [2 for _ in range(10)] + [3 for _ in range(10)] + [1 for _ in range(10)]}) ax = parallel_coordinates(df, 'class', sort_labels=True) polylines, labels = ax.get_legend_handles_labels() color_label_tuples = \ zip([polyline.get_color() for polyline in polylines], labels) ordered_color_label_tuples = sorted(color_label_tuples, key=lambda x: x[1]) prev_next_tupels = zip([i for i in ordered_color_label_tuples[0:-1]], [i for i in ordered_color_label_tuples[1:]]) for prev, nxt in prev_next_tupels: # lables and colors are ordered strictly increasing assert prev[1] < nxt[1] and prev[0] < nxt[0]
index="PERSON", columns="YEAR", values="TEXT", aggfunc="count").reset_index(level=0) # mask names list you want to display in the plot friends = [] temp = yearly_parallel.loc[yearly_parallel.PERSON.isin( friends), ["PERSON"] + list(range(2012, 2019))] color_sequence = [ '#1f77b4', '#dbdb8d', '#ff7f0e', '#27ae60', '#9edae5', '#98df8a', '#d62728', '#ff9896', '#9467bd', '#c5b0d5', '#8c564b', '#c49c94', '#ffbb78', '#f7b6d2', '#7f7f7f', '#c7c7c7', '#bcbd22', '#aec7e8', '#2ca02c', '#e377c2' ] fig, ax = plt.subplots(figsize=(15, 6)) parallel_coordinates(temp, "PERSON", ax=ax, lw=2.5, color=color_sequence[:len(friends)]) ax.spines['top'].set_visible(True) ax.spines['bottom'].set_visible(False) ax.spines['right'].set_visible(True) ax.spines['left'].set_visible(False) ax.set_title("Text count based yearly ranking", loc="left") filepath = plot_dir / "top_n_yearly_length.jpeg" fig.savefig(filepath.as_posix(), bbox_inches='tight') plt.close(fig) print("Saved the plot", filepath) #
annot=True ) plt.show() # In[23]: import pandas as pd from pandas.plotting import parallel_coordinates p = (pokemon[(pokemon['Type 1'].isin(["Psychic", "Fighting"]))] .loc[:, ['Type 1', 'Attack', 'Sp. Atk', 'Defense', 'Sp. Def']] ) parallel_coordinates(p, 'Type 1') plt.show() # ### 非常实用的方法是将Seaborn的分类图分为三类,将分类变量每个级别的每个观察结果显示出来,显示每个观察分布的抽象表示,以及应用统计估计显示的权重趋势和置信区间: # # - 第一个包括函数swarmplot()和stripplot() # - 第二个包括函数boxplot()和violinplot() # - 第三个包括函数barplot()和pointplt() # In[8]: sns.swarmplot(x='Generation',y='Defense',hue='Legendary',data = pokemon) plt.show() sns.stripplot(x='Generation',y='Defense',hue='Legendary',data = pokemon)