def plot_parallel(polt_data, m, name):
    df = pd.DataFrame(data=polt_data, columns=[i + 1 for i in range(m)])
    df['0'] = pd.Series(1, index=df.index)
    fig = plt.figure(figsize=(12, 9), dpi=200)
    plt.title('%s point number: %d' % (name, len(polt_data)))
    parallel_coordinates(df, class_column='0')
    plt.show()
Ejemplo n.º 2
0
def parallelPlot(df,
                 yearI,
                 yearF,
                 target='Años',
                 xlabel='Topics',
                 ylabel='Valoración promedio',
                 title='Valoración por año'):
    data = expandReviewTime(df)
    dfVideoGamesSummary1 = pd.pivot_table(data,
                                          values='overall',
                                          index=['maximos'],
                                          columns=['año'],
                                          aggfunc=np.average)
    dfVideoGamesSummary1 = dfVideoGamesSummary1.transpose()
    dfVideoGamesSummary1[target] = dfVideoGamesSummary1.index
    Years = [" " + str(i)
             for i in range(yearI, yearF)]  #; Years.append('Years')
    dfVideoGamesSummary1 = dfVideoGamesSummary1.drop(Years)
    #dfVideoGamesSummary2 = dfVideoGamesSummary1[Years]
    plt.figure()
    plt.ylabel(ylabel)
    plt.xlabel(xlabel)
    plt.title(title)
    parallel_coordinates(dfVideoGamesSummary1, target, colormap='gist_rainbow')
    plt.show()
Ejemplo n.º 3
0
    def cluster(self):
        df1 = self.read_csv("correct_file_characterristic1")
        clst = [[127], [558], [662], [691], [793], [802], [834], [876], [1019]]
        type = 0
        for i in clst:

            tempDic = dict()
            for x in range(24):
                tempDic[x] = []

            for j in i:
                # print(j)
                l = np.array(df1.iloc[[j]]).tolist()[0]

                for idex in range(0, 24):
                    s = 0
                    for ge in range(0, 10):
                        indx = idex * 10 + ge
                        s += l[indx]
                    if s > 0:
                        tempDic[idex].append(1)
                    else:
                        tempDic[idex].append(-1)
                # for idex,item in enumerate(l):
                #     if idex< 30:
                #         tempDic[idex].append(item)
            # for y in tempDic:
            #     print(tempDic[y])
            tf = pd.DataFrame(tempDic)
            parallel_coordinates(tf, 0, color="red")
            plt.savefig("myresult/" + str(type) + ".svg")
            plt.close()
            type += 1
Ejemplo n.º 4
0
    def test_parallel_coordinates(self, iris):
        from pandas.plotting import parallel_coordinates
        from matplotlib import cm

        df = iris

        ax = _check_plot_works(parallel_coordinates,
                               frame=df,
                               class_column="Name")
        nlines = len(ax.get_lines())
        nxticks = len(ax.xaxis.get_ticklabels())

        rgba = ("#556270", "#4ECDC4", "#C7F464")
        ax = _check_plot_works(parallel_coordinates,
                               frame=df,
                               class_column="Name",
                               color=rgba)
        self._check_colors(ax.get_lines()[:10],
                           linecolors=rgba,
                           mapping=df["Name"][:10])

        cnames = ["dodgerblue", "aquamarine", "seagreen"]
        ax = _check_plot_works(parallel_coordinates,
                               frame=df,
                               class_column="Name",
                               color=cnames)
        self._check_colors(ax.get_lines()[:10],
                           linecolors=cnames,
                           mapping=df["Name"][:10])

        ax = _check_plot_works(parallel_coordinates,
                               frame=df,
                               class_column="Name",
                               colormap=cm.jet)
        cmaps = [cm.jet(n) for n in np.linspace(0, 1, df["Name"].nunique())]
        self._check_colors(ax.get_lines()[:10],
                           linecolors=cmaps,
                           mapping=df["Name"][:10])

        ax = _check_plot_works(parallel_coordinates,
                               frame=df,
                               class_column="Name",
                               axvlines=False)
        assert len(ax.get_lines()) == (nlines - nxticks)

        colors = ["b", "g", "r"]
        df = DataFrame({
            "A": [1, 2, 3],
            "B": [1, 2, 3],
            "C": [1, 2, 3],
            "Name": colors
        })
        ax = parallel_coordinates(df, "Name", color=colors)
        handles, labels = ax.get_legend_handles_labels()
        self._check_colors(handles, linecolors=colors)

        with tm.assert_produces_warning(FutureWarning):
            parallel_coordinates(data=df, class_column="Name")
        with tm.assert_produces_warning(FutureWarning):
            parallel_coordinates(df, "Name", colors=colors)
Ejemplo n.º 5
0
    def test_parallel_coordinates(self, iris):
        from pandas.plotting import parallel_coordinates
        from matplotlib import cm

        df = iris

        ax = _check_plot_works(parallel_coordinates,
                               frame=df,
                               class_column='Name')
        nlines = len(ax.get_lines())
        nxticks = len(ax.xaxis.get_ticklabels())

        rgba = ('#556270', '#4ECDC4', '#C7F464')
        ax = _check_plot_works(parallel_coordinates,
                               frame=df,
                               class_column='Name',
                               color=rgba)
        self._check_colors(ax.get_lines()[:10],
                           linecolors=rgba,
                           mapping=df['Name'][:10])

        cnames = ['dodgerblue', 'aquamarine', 'seagreen']
        ax = _check_plot_works(parallel_coordinates,
                               frame=df,
                               class_column='Name',
                               color=cnames)
        self._check_colors(ax.get_lines()[:10],
                           linecolors=cnames,
                           mapping=df['Name'][:10])

        ax = _check_plot_works(parallel_coordinates,
                               frame=df,
                               class_column='Name',
                               colormap=cm.jet)
        cmaps = lmap(cm.jet, np.linspace(0, 1, df['Name'].nunique()))
        self._check_colors(ax.get_lines()[:10],
                           linecolors=cmaps,
                           mapping=df['Name'][:10])

        ax = _check_plot_works(parallel_coordinates,
                               frame=df,
                               class_column='Name',
                               axvlines=False)
        assert len(ax.get_lines()) == (nlines - nxticks)

        colors = ['b', 'g', 'r']
        df = DataFrame({
            "A": [1, 2, 3],
            "B": [1, 2, 3],
            "C": [1, 2, 3],
            "Name": colors
        })
        ax = parallel_coordinates(df, 'Name', color=colors)
        handles, labels = ax.get_legend_handles_labels()
        self._check_colors(handles, linecolors=colors)

        with tm.assert_produces_warning(FutureWarning):
            parallel_coordinates(data=df, class_column='Name')
        with tm.assert_produces_warning(FutureWarning):
            parallel_coordinates(df, 'Name', colors=colors)
Ejemplo n.º 6
0
def parallel_plot(data):

    my_colors = list(islice(cycle(['b', 'r', 'g', 'y', 'k']), None, len(data)))

    plt.figure(figsize=(15, 8)).gca().axes.set_ylim([-3, +3])

    parallel_coordinates(data, 'prediction', color=my_colors, marker='o')
Ejemplo n.º 7
0
def parallel_plot(data, rg):
    """
    Plot the parallel plots for k-means class representatives
    """
    my_colors = list(islice(cycle(["b", "r", "g", "y", "k"]), None, len(data)))
    plt.figure(figsize=(18, 8)).gca().axes.set_ylim(rg)
    parallel_coordinates(data, "prediction", color=my_colors, marker="o")
def display_parallel_coordinates(df, num_clusters):
    '''Display a parallel coordinates plot for the clusters in df'''

    # Select data points for individual clusters
    cluster_points = []
    for i in range(num_clusters):
        cluster_points.append(df[df.cluster == i])

    # Create the plot
    fig = plt.figure(figsize=(12, 15))
    title = fig.suptitle("Parallel Coordinates Plot for the Clusters",
                         fontsize=18)
    fig.subplots_adjust(top=0.95, wspace=0)

    # Display one plot for each cluster, with the lines for the main cluster appearing over the lines for the other clusters
    for i in range(num_clusters):
        plt.subplot(num_clusters, 1, i + 1)
        for j, c in enumerate(cluster_points):
            if i != j:
                pc = parallel_coordinates(c,
                                          'cluster',
                                          color=[addAlpha(palette[j], 0.2)])
        pc = parallel_coordinates(cluster_points[i],
                                  'cluster',
                                  color=[addAlpha(palette[i], 0.5)])

        # Stagger the axes
        ax = plt.gca()
        for tick in ax.xaxis.get_major_ticks()[1::2]:
            tick.set_pad(20)

    plt.show()
    return fig
def plot_parallel_coordinates(
        df, col_name, sample_size=0.1,
        scale_cols=True,
        figsize=(20, 10),
):
    """
    Take a sample from df, convert to Pandas and do on it a parallel coordinates plot of
    col_name vs other columns. Intended to see how the features correlate with the label
    and/or whether some features can separate the label values.
    :param df: spark dataframe.
    :param col_name: (string) column indicating the y axis values for all the lines.
    :param sample_size: (float) proportion of the data to sample for the plot.
    :param scale_cols: (bool) Whether to scale the features to the [0., 1.] interval.
    :param figsize: (tuple)
    :return: None
    """
    numeric_features = list_numeric_features(df)
    if col_name not in numeric_features: numeric_features += [col_name]
    sampled_df = df.select(numeric_features).sample(False, sample_size).toPandas()

    if scale_cols:
        for name in sampled_df.columns:
            if name != col_name:
                sampled_df[name] -= sampled_df[name].min()
                sampled_df[name] /= sampled_df[name].max()

    plt.figure(figsize=figsize)
    parallel_coordinates(sampled_df, col_name)
    plt.xticks(rotation=90)
Ejemplo n.º 10
0
def demonstrate(wines):
    cp = sns.countplot(x="quality",
                       hue="wine_type",
                       data=wines,
                       palette={
                           "red": "#FF9999",
                           "white": "#FFE888"
                       })
    plt.show()

    # demonstrate some attributes for both red and white wine
    cols = [
        'density', 'residual sugar', 'total sulfur dioxide', 'fixed acidity'
    ]
    subset_df = wines[cols]

    ss = StandardScaler()
    scaled_df = ss.fit_transform(subset_df)
    scaled_df = pd.DataFrame(scaled_df, columns=cols)
    final_df = pd.concat([scaled_df, wines['wine_type']], axis=1)
    final_df.head()

    # plot parallel coordinates
    plt.figure()
    parallel_coordinates(final_df, 'wine_type', color=('#FFE888', '#FF9999'))
    plt.show()
Ejemplo n.º 11
0
 def make_parallel_coordinates(self):
     fig, ax = plt.subplots()
     data = pd.DataFrame(self.transform.data)
     target = self.transform.load.partition.y_train.reset_index(drop=True)
     data = pd.concat([data, target], axis=1, sort=False)
     parallel_coordinates(data, *self.transform.load.outputs)
     plt.savefig(f"{self.dir_}/Visual/transform_parallel_coordinates.png",bbox_inches='tight')
     plt.close(fig)
Ejemplo n.º 12
0
def plot_parallel_coordinates(data):
    # format data
    features = extract_features(data)
    labels = extract_labels(data)
    data_to_plot = pd.concat([features, labels], axis=1)
    # plot
    parallel_coordinates(data_to_plot, 'Class')
    plt.show()
Ejemplo n.º 13
0
def parallel_plot(df, label):
    my_colors = ['r', 'g', 'b']
    plt.figure(figsize=(10, 6))
    plt.title('Clusters of iris')
    plt.xlabel('Flower Features')
    plt.ylabel('Standard Deviations')
    plt.tick_params(labelrotation=20)
    parallel_coordinates(df, label, color=my_colors, marker='o')
def plot_parallel(df):
    plt.figure(figsize=(12, 6))
    plt.title(df.index[0]+' score of different algorithms', fontsize=15)
    parallel_coordinates(df, 'dataset') 
    plt.grid(lw=0.1)
    plt.legend(loc=4)
    plt.ylabel(df.index[0], fontsize=14)
    plt.show()
Ejemplo n.º 15
0
def plot_parallel_coordinates(plotdata,label):
    plotdata['label']=label
    print(plotdata)
    plt.figure(figsize=(100,50),dpi=20)
    parallel_coordinates(plotdata,'label')
    plt.show()
    dims = len(plotdata[0])
    print(dims)
Ejemplo n.º 16
0
def parallel_plot(data):
    from itertools import cycle, islice
    from pandas.plotting import parallel_coordinates
    import matplotlib.pyplot as plt

    my_colors = list(islice(cycle(['b', 'r', 'g', 'y', 'k']), None, len(data)))
    plt.figure(figsize=(15, 8)).gca().axes.set_ylim([-2.5, +2.5])
    parallel_coordinates(data, 'prediction', color=my_colors, marker='o')
Ejemplo n.º 17
0
def parallel_coordinates_data_visualization(dataset_df):
    plt.figure(figsize=(10, 8))
    parallel_coordinates(dataset_df, "class")
    plt.title('Parallel Coordinates Plot', fontsize=15, fontweight='bold')
    plt.xlabel('Features', fontsize=15)
    plt.ylabel('Features values', fontsize=15)
    plt.legend(loc=1, prop={'size': 15}, frameon=True, shadow=True, facecolor="white", edgecolor="black")
    plt.show()
Ejemplo n.º 18
0
 def make_parallel_coordinates(self):
     fig, ax = plt.subplots()
     data = pd.DataFrame(self.transform.data,
                         columns=self.transform.load.inputs)
     parallel_coordinates(data, *self.load.outputs)
     plt.savefig(f"Data/Visual/{self.transform}_parallel_coordinates.png",
                 bbox_inches='tight')
     plt.close(fig)
Ejemplo n.º 19
0
def parallel_plot(data, title):
    plt.figure(figsize=(10, 6))
    plt.ylim([-3, 3])
    plt.title(title)
    plt.xlabel('Weather Features')
    plt.ylabel('Standard Deviation')
    plt.tick_params(labelrotation=20)
    my_colors = list(islice(cycle(['r', 'g', 'b', 'y', 'k']), None, len(data)))
    parallel_coordinates(data, 'prediction', color=my_colors, marker='o')
def EDA_way_1():
    print("Info\n", df.info())

    print("\n\n\nData Describe\n", df.describe())

    print("\n\n\nFirst 5 Row of Data\n", df.head())

    print("\n\n\nLast 5 Row of Data\n", df.tail())

    print("\n\n\nData Types\n", df.dtypes)

    print("\n\n\nCounting the number of rows\n", df.count())

    n_bins = 10
    fig, axs = plt.subplots(2, 2)
    axs[0, 0].hist(df['SepalLengthCm'], bins=n_bins)
    axs[0, 0].set_title('Sepal Length')
    axs[0, 1].hist(df['SepalWidthCm'], bins=n_bins)
    axs[0, 1].set_title('Sepal Width')
    axs[1, 0].hist(df['PetalLengthCm'], bins=n_bins)
    axs[1, 0].set_title('Petal Length')
    axs[1, 1].hist(df['PetalWidthCm'], bins=n_bins)
    axs[1, 1].set_title('Petal Width')

    fig.tight_layout(pad=1.0)

    fig, axs = plt.subplots(2, 2)
    fn = ["SepalLengthCm", "SepalWidthCm", "PetalLengthCm", "PetalWidthCm"]
    cn = ['Iris-setosa', 'Iris-versicolor', 'Iris-virginica']
    sns.boxplot(x='Species',
                y='SepalLengthCm',
                data=df,
                order=cn,
                ax=axs[0, 0])
    sns.boxplot(x='Species', y='SepalWidthCm', data=df, order=cn, ax=axs[0, 1])
    sns.boxplot(x='Species',
                y='PetalLengthCm',
                data=df,
                order=cn,
                ax=axs[1, 0])
    sns.boxplot(x='Species', y='PetalWidthCm', data=df, order=cn, ax=axs[1, 1])
    # add some spacing between subplots
    fig.tight_layout(pad=1.0)

    sns.violinplot(x="Species",
                   y="PetalLengthCm",
                   data=df,
                   size=5,
                   order=cn,
                   palette='colorblind')

    sns.pairplot(df, hue="Species", height=2, palette='colorblind')

    corrmat = df.corr()
    sns.heatmap(corrmat, annot=True, square=True)

    parallel_coordinates(df, "Species", color=['blue', 'red', 'green'])
Ejemplo n.º 21
0
def parallel_coordinate(df,label):
    """
    :param df: Data frame
    :param label: Label
    :return: None
    """
    from pandas.plotting import parallel_coordinates
    parallel_coordinates(df,label)
    plt.show()
Ejemplo n.º 22
0
    def snscord(self):

        self.dataFrameSet()

        parallel_coordinates(self.mainFrame, self.Header[-1])

        py.xticks(rotation=90)

        py.show()
Ejemplo n.º 23
0
def plot_bicluster(matrix, bicluster, name="Bicluster"):
    bicluster_matrix = get_bicluster(matrix, bicluster)
    df = pd.DataFrame(bicluster_matrix)
    df["index"] = df.index.values
    parallel_coordinates(df, "index", linewidth=1.0)
    plt.title(name + "\nMean Squared Residue: " + str(bicluster.msr))
    plt.xlabel('Condition')
    plt.ylabel('Expression level')
    plt.gca().legend_ = None
    plt.show()
Ejemplo n.º 24
0
def draw_coord(coords):
    # coordinates plot
    plt.close("all")
    plt.figure(figsize=(12, 10), dpi=80)
    parallel_coordinates(coords, 'rule')
    plt.legend([])
    plt.grid(True)
    plt.title('TEDS attributes parallel coordinates plot', fontsize=14)
    plt.tight_layout()
    plt.savefig(os.path.join(figures_path, 'TEDS_basket_coordinate_plot.png'))
    plt.show()
Ejemplo n.º 25
0
def plot2d(antibody):

    dfplot = df.iloc[:, antibody.features]
    dfplot = pd.concat([dfplot, dfclass], axis=1, ignore_index=True)
    dfplot = pd.concat([dfplot, dflabel], axis=1, ignore_index=True)
    col = [str(x) for x in antibody.features]
    col.append("class_id")
    col.append("class")
    dfplot.columns = col
    xlabel = 'feature ' + col[0]
    ylabel = 'feature ' + col[1]
    plt.figure(figsize=(15, 10))
    parallel_coordinates(dfplot, "class")
    plt.title('Parallel Coordinates Plot', fontsize=20, fontweight='bold')
    plt.xlabel('Features', fontsize=15)
    plt.ylabel('Features values', fontsize=15)
    plt.legend(loc=1,
               prop={'size': 15},
               frameon=True,
               shadow=True,
               facecolor="white",
               edgecolor="black")
    filename = 'parallel_' + '_'.join(dfplot.columns) + '.png'
    plt.savefig(filename)

    colors = ["red", "green", "blue"]
    clr = {
        'short_acting': "red",
        "medium_acting": "green",
        "Long_acting": "blue"
    }
    labels = ['short_acting', 'medium_acting', 'Long_acting']
    # Create plot
    fig = plt.figure()
    ax = fig.add_subplot(1, 1, 1, axisbg="1.0")

    for row in dfplot.iterrows():
        index, x, y, cl_id, cl = row[0], row[1][0], row[1][1], int(
            row[1][2]), row[1][3]
        ax.scatter(x,
                   y,
                   alpha=0.8,
                   c=clr[cl],
                   edgecolors='none',
                   s=30,
                   label=cl)
    ax.set_xlabel(xlabel)
    ax.set_ylabel(ylabel)
    plt.title('2d feature distribution')
    plt.legend()
    filename = 'scatter_' + '_'.join(dfplot.columns) + '.png'
    fig.savefig(filename)  # save the figure to file
    plt.close(fig)
Ejemplo n.º 26
0
def plot_parallel_coordinates(gs, df, dpi, name=""):
    """
    For the provided axes and the cluster, plots the basic information about the cluster in them.
    """
    split_gs = matplotlib.gridspec.GridSpecFromSubplotSpec(2, 1, subplot_spec=gs, hspace=0.8)
    log_ax  = plt.subplot(split_gs[0])  # noqa: F821
    perc_ax = plt.subplot(split_gs[1])  # noqa: F821

    log_columns = ["POSIX_RAW_agg_perf_by_slowest", "POSIX_RAW_total_bytes", "RAW_runtime", "POSIX_RAW_total_accesses", "RAW_nprocs", "POSIX_RAW_total_files", "apps_short"]
    perc_columns = ["POSIX_BYTES_READ_PERC", 'POSIX_read_only_bytes_perc', 'POSIX_read_write_bytes_perc', 'POSIX_write_only_bytes_perc', "POSIX_unique_files_perc", "apps_short"]

    # Preprocess the log table data
    log_data = df[log_columns].copy()
    log_data.POSIX_RAW_total_bytes /= 1024 ** 3  # Convert to gigabytes 
    log_data.POSIX_RAW_total_accesses /= 1024    # Convert to kilo-transactions
    log_data.rename(columns={"POSIX_RAW_agg_perf_by_slowest": "Throughput\n[MB/s]", "POSIX_RAW_total_accesses": "R/W accesses\n(in 1000s)", "RAW_runtime": "runtime [s]", "RAW_nprocs": "App size\n(nprocs)", "POSIX_RAW_total_bytes": "Volume [GB]",   "POSIX_RAW_total_files": "files\n(count)"}, inplace=True)

    # Preprocess the percentage table data
    perc_data = df[perc_columns].copy()
    perc_data.rename(columns={"POSIX_BYTES_READ_PERC": "Read ratio\n(by volume)", 'POSIX_read_only_bytes_perc': "RO files\n(by file #)", 'POSIX_read_write_bytes_perc': "R/W files\n(by file #)", 'POSIX_write_only_bytes_perc': "WO files\n(by file #)", "POSIX_unique_files_perc": "Unique files\n(by file #)"}, inplace=True)
    
    # Title
    log_ax.set_title("Cluster {}:\n {} jobs".format(name, np.sum(df.shape[0])))

    # First, plot the logarithmic plot
    parallel_coordinates(log_data.sample(n=int(log_data.shape[0] / 10)), "apps_short", ax=log_ax, sort_labels=True, alpha=0.1)
    # for lh in log_ax.get_legend().legendHandles: lh.set_alpha(1)  # noqa: E701
    log_ax.get_legend().remove()
    log_ax.set_yscale('log')
    log_ax.grid(True)
    # log_ax.yaxis.set_minor_formatter(ticker.ScalarFormatter())
    log_ax.set_xticklabels(log_ax.get_xticklabels(), rotation=37, ha="right", rotation_mode="anchor")
    log_ax.set_ylim(10**0, 10**6)

    # Then the percentage plot
    parallel_coordinates(perc_data.sample(n=int(perc_data.shape[0] / 10)), "apps_short", ax=perc_ax, sort_labels=True, alpha=0.1)
    # for lh in perc_ax.get_legend().legendHandles: lh.set_alpha(1)  # noqa: E701
    perc_ax.get_legend().remove()
    perc_ax.grid(True)
    plt.yticks([0, 0.25, 0.5, 0.75, 1.0], ["0%", "25%", "50%", "75%", "100%"])
    perc_ax.yaxis.set_minor_formatter(ticker.ScalarFormatter())
    perc_ax.set_xticklabels(perc_ax.get_xticklabels(), rotation=37, ha="right", rotation_mode="anchor")

    # Shift labels 
    dx = 0; dy = 5/72. 
    offset = matplotlib.transforms.ScaledTranslation(dx, dy, dpi)

    # apply offset transform to all x ticklabels.
    for label in log_ax.xaxis.get_majorticklabels():
        label.set_transform(label.get_transform() + offset)
    for label in perc_ax.xaxis.get_majorticklabels():
        label.set_transform(label.get_transform() + offset)
def display_k_centers(df, cluster_centers, cluster_labels):
    centers_df = pd.DataFrame(cluster_centers,
                              index=['Means1', 'Means2'],
                              columns=list(df.columns))
    centers_df['cluster'] = [1, 2]
    ##print(centers_df)
    plt.figure(figsize=(7, 5))
    plt.title('Clusters 1 and 2 means along 5 terms')
    parallel_coordinates(centers_df,
                         'cluster',
                         color=['blue', 'red'],
                         marker='o')
    plt.show()
Ejemplo n.º 28
0
def plot_gender_parallel_coordinates():
    colors = (
        '#1f77b4',
        '#ff7f0e',
    )  # Lightish blue and orange
    plt.figure(figsize=(12, 8))
    parallel_coordinates(df.drop('CustomerID', axis=1).sort_values(
        by=['Gender', 'Annual_Income'], ascending=[True, False]),
                         "Gender",
                         color=colors)
    plt.title('Parallel Coords for Gender', fontsize=18)
    plt.tight_layout()
    plt.show()
Ejemplo n.º 29
0
def parallelPlot(colList, data):
    try:
        td = pd.DataFrame(columns=colList)
        for col in colList:
            td[col] = data[col]

        pp.parallel_coordinates(td, 'churn', color=('#556270', '#4ECDC4'))
        plt.show()
    except Exception as ex:
        print "-----------------------------------------------------------------------"
        template = "An exception of type {0} occurred. Arguments:\n{1!r}"
        message = template.format(type(ex).__name__, ex.args)
        print message
Ejemplo n.º 30
0
    def add_reference_potentials_to_plot(self,
                                         ax,
                                         qoi_names,
                                         reference_data_marker_size=300,
                                         reference_data_marker_type='|',
                                         reference_data_colors=None):

        _default_data_colors = self.reference_data_colors_default

        _reference_potential_names = [
            k for k in self.configuration.reference_potentials
        ]

        if reference_data_marker_size is not None:
            self.reference_data_marker_size = reference_data_marker_size
        if reference_data_marker_type is not None:
            self.reference_data_marker_type = reference_data_marker_type
        if reference_data_colors is not None:
            if isinstance(reference_data_colors, dict):
                self.reference_data_colors = copy.deepcopy(
                    reference_data_colors)
            elif isinstance(reference_data_colors, list):
                _default_data_colors = list(reference_data_colors)
                self.reference_data_colors = OrderedDict()
                for i, v in enumerate(_reference_potential_names):
                    self.reference_data_colors[v] = _default_data_colors[i]
        else:
            self.reference_data_colors = OrderedDict()
            for i, v in enumerate(_reference_potential_names):
                self.reference_data_colors[v] = _default_data_colors[i]

        _marker_size = self.reference_data_marker_size
        _marker_type = self.reference_data_marker_type
        _marker_color = self.reference_data_colors

        _col_names = []
        for q in qoi_names:
            if q in self.configuration.qoi_names:
                _col_names.append('{}.nerr'.format(q))
            elif q in self.configuration.qoi_validation_names:
                _col_names.append('{}.nerr_v'.format(q))
            else:
                _col_names.append(q)

        for v in _reference_potential_names:
            _df = self.df.loc[self.df['sim_id'].isin([v])]
            parallel_coordinates(_df,
                                 'sim_id',
                                 axvlines=False,
                                 cols=_col_names,
                                 color=_marker_color[v])
Ejemplo n.º 31
0
    def test_get_standard_colors_random_seed(self):
        # GH17525
        df = DataFrame(np.zeros((10, 10)))

        # Make sure that the random seed isn't reset by _get_standard_colors
        plotting.parallel_coordinates(df, 0)
        rand1 = random.random()
        plotting.parallel_coordinates(df, 0)
        rand2 = random.random()
        assert rand1 != rand2

        # Make sure it produces the same colors every time it's called
        from pandas.plotting._style import _get_standard_colors
        color1 = _get_standard_colors(1, color_type='random')
        color2 = _get_standard_colors(1, color_type='random')
        assert color1 == color2
Ejemplo n.º 32
0
    def test_parallel_coordinates(self):
        from pandas.plotting import parallel_coordinates
        from matplotlib import cm

        df = self.iris

        ax = _check_plot_works(parallel_coordinates,
                               frame=df, class_column='Name')
        nlines = len(ax.get_lines())
        nxticks = len(ax.xaxis.get_ticklabels())

        rgba = ('#556270', '#4ECDC4', '#C7F464')
        ax = _check_plot_works(parallel_coordinates,
                               frame=df, class_column='Name', color=rgba)
        self._check_colors(
            ax.get_lines()[:10], linecolors=rgba, mapping=df['Name'][:10])

        cnames = ['dodgerblue', 'aquamarine', 'seagreen']
        ax = _check_plot_works(parallel_coordinates,
                               frame=df, class_column='Name', color=cnames)
        self._check_colors(
            ax.get_lines()[:10], linecolors=cnames, mapping=df['Name'][:10])

        ax = _check_plot_works(parallel_coordinates,
                               frame=df, class_column='Name', colormap=cm.jet)
        cmaps = lmap(cm.jet, np.linspace(0, 1, df['Name'].nunique()))
        self._check_colors(
            ax.get_lines()[:10], linecolors=cmaps, mapping=df['Name'][:10])

        ax = _check_plot_works(parallel_coordinates,
                               frame=df, class_column='Name', axvlines=False)
        assert len(ax.get_lines()) == (nlines - nxticks)

        colors = ['b', 'g', 'r']
        df = DataFrame({"A": [1, 2, 3],
                        "B": [1, 2, 3],
                        "C": [1, 2, 3],
                        "Name": colors})
        ax = parallel_coordinates(df, 'Name', color=colors)
        handles, labels = ax.get_legend_handles_labels()
        self._check_colors(handles, linecolors=colors)

        with tm.assert_produces_warning(FutureWarning):
            parallel_coordinates(data=df, class_column='Name')
        with tm.assert_produces_warning(FutureWarning):
            parallel_coordinates(df, 'Name', colors=colors)
Ejemplo n.º 33
0
 def test_parallel_coordinates_with_sorted_labels(self):
     """ For #15908 """
     from pandas.plotting import parallel_coordinates
     df = DataFrame({"feat": [i for i in range(30)],
                     "class": [2 for _ in range(10)] +
                     [3 for _ in range(10)] +
                     [1 for _ in range(10)]})
     ax = parallel_coordinates(df, 'class', sort_labels=True)
     polylines, labels = ax.get_legend_handles_labels()
     color_label_tuples = \
         zip([polyline.get_color() for polyline in polylines], labels)
     ordered_color_label_tuples = sorted(color_label_tuples,
                                         key=lambda x: x[1])
     prev_next_tupels = zip([i for i in ordered_color_label_tuples[0:-1]],
                            [i for i in ordered_color_label_tuples[1:]])
     for prev, nxt in prev_next_tupels:
         # lables and colors are ordered strictly increasing
         assert prev[1] < nxt[1] and prev[0] < nxt[0]
Ejemplo n.º 34
0
    index="PERSON", columns="YEAR", values="TEXT", aggfunc="count").reset_index(level=0)

# mask names list you want to display in the plot
friends = []
temp = yearly_parallel.loc[yearly_parallel.PERSON.isin(
    friends), ["PERSON"] + list(range(2012, 2019))]

color_sequence = [
    '#1f77b4', '#dbdb8d', '#ff7f0e', '#27ae60', '#9edae5',
    '#98df8a', '#d62728', '#ff9896', '#9467bd', '#c5b0d5',
    '#8c564b', '#c49c94', '#ffbb78', '#f7b6d2', '#7f7f7f',
    '#c7c7c7', '#bcbd22', '#aec7e8', '#2ca02c', '#e377c2'
]

fig, ax = plt.subplots(figsize=(15, 6))
parallel_coordinates(temp, "PERSON", ax=ax, lw=2.5,
                     color=color_sequence[:len(friends)])

ax.spines['top'].set_visible(True)
ax.spines['bottom'].set_visible(False)
ax.spines['right'].set_visible(True)
ax.spines['left'].set_visible(False)
ax.set_title("Text count based yearly ranking", loc="left")


filepath = plot_dir / "top_n_yearly_length.jpeg"
fig.savefig(filepath.as_posix(), bbox_inches='tight')
plt.close(fig)
print("Saved the plot", filepath)


#
Ejemplo n.º 35
0
    annot=True
)
plt.show()


# In[23]:


import pandas as pd
from pandas.plotting import parallel_coordinates

p = (pokemon[(pokemon['Type 1'].isin(["Psychic", "Fighting"]))]
         .loc[:, ['Type 1', 'Attack', 'Sp. Atk', 'Defense', 'Sp. Def']]
    )

parallel_coordinates(p, 'Type 1')
plt.show()


# ### 非常实用的方法是将Seaborn的分类图分为三类,将分类变量每个级别的每个观察结果显示出来,显示每个观察分布的抽象表示,以及应用统计估计显示的权重趋势和置信区间:
# 
# - 第一个包括函数swarmplot()和stripplot()
# - 第二个包括函数boxplot()和violinplot()
# - 第三个包括函数barplot()和pointplt()

# In[8]:


sns.swarmplot(x='Generation',y='Defense',hue='Legendary',data = pokemon)
plt.show()
sns.stripplot(x='Generation',y='Defense',hue='Legendary',data = pokemon)