Esempio n. 1
0
    def plot_chemical_trajectory(self, environment, filename):
        """
        Plot the trajectory through chemical space.

        Parameters
        ----------
        environment : str
            the name of the environment for which the chemical space trajectory is desired
        """
        chemical_state_trajectory = self.extract_state_trajectory(environment)

        visited_states = list(set(chemical_state_trajectory))

        state_trajectory = np.zeros(len(chemical_state_trajectory))
        for idx, chemical_state in enumerate(chemical_state_trajectory):
            state_trajectory[idx] = visited_states.index(chemical_state)

        with PdfPages(filename) as pdf:
            sns.set(font_scale=2)
            fig = plt.figure(figsize=(28, 12))
            plt.subplot2grid((1,2), (0,0))
            ax = sns.scatterplot(np.arange(len(state_trajectory)), state_trajectory)
            plt.yticks(np.arange(len(visited_states)), visited_states)

            plt.title("Trajectory through chemical space in {}".format(environment))
            plt.xlabel("iteration")
            plt.ylabel("chemical state")
            plt.tight_layout()

            plt.subplot2grid((1,2), (0,1))
            ax = sns.countplot(y=state_trajectory)

            pdf.savefig(fig)
            plt.close()
Esempio n. 2
0
def map_plotter():
    """Plot cholopleth map and scatterplot below it"""
    print('Plotting the map... ', end='')
    fig, ax = plt.subplots(2,1, figsize=(10.7, 8.3),gridspec_kw={'height_ratios':[2, 1]})
    fig.tight_layout(pad=1, w_pad=0.5, h_pad=0.5)
    fig.patch.set_facecolor('#f2f2f2')

    #   set colormap
    cmap = plt.cm.Reds
    cmap.set_under(color='white')

    #   set min and max for the axes, adjust to create some space for legend etc.
    vmin = min(avg_ward_prices['price'])
    vmax = max(avg_ward_prices['price'])
    xmin = lambda x: round(vmin, -2) - 100 if round(vmin, -2) > vmin else round(vmin, -2)-50
    xmax = lambda x: round(vmax, -2) + 100 if round(vmax, -2) <= vmax else round(vmax, -2)+50
    xmin = xmin(xmin)
    xmax = xmax(vmax)

    #   set limits on map axis (coordinates)
    ax[0].set_xlim([500000, 563000])
    ax[0].set_ylim([155000, 202000])
    #   remove axes
    ax[0].axis('off')

    #   use the ward results and obtain the centre of polygon for each ward
    map_pos_wards = mapped_wards[['DISTRICT', 'geometry', 'price']]
    map_pos_wards = map_pos_wards[map_pos_wards['price'] != 0]
    centrx = map_pos_wards['geometry'].apply(lambda c: c.centroid.x)
    centry = map_pos_wards['geometry'].apply(lambda c: c.centroid.y)

    #   set up a kernel density estimation plot masked by the ward boundaries
    j_map = cascaded_union(map_pos_wards['geometry'])
    j = ax[0].add_patch(PolygonPatch(j_map, fc='none', ec='#c1c1c1'))
    try:
        sns.kdeplot(centrx, centry, ax=ax[0], n_levels=len(map_pos_wards.index), cmap=cmap, shade=True, shade_lowest=False,
                    zorder=11, kernel='biw', gridsize=1000, alpha=1)
        for col in ax[0].collections:
            col.set_clip_path(j)
            #   plot map
        map_df.plot(linewidth=0.5, ax=ax[0], edgecolor='#c1c1c1', color='white')
        map_pos_wards.plot(linewidth=0, ax=ax[0], edgecolor='#c1c1c1', column=map_pos_wards['price'], vmin=xmin,
                           vmax=xmax, cmap=cmap, zorder=10)
        ax[0].add_patch(PolygonPatch(j_map, fc='none', ec='#c1c1c1'))
    except ValueError:
        print('Single result cannot be plotted on a kdeplot. Please broaden the search criteria.')
        ax[0].annotate('*', xy=(centrx[0], centry[0]), xycoords='data', horizontalalignment='left',
            verticalalignment='top', fontsize=8, color='black', zorder=11)
        ax[0].annotate('* Single result cannot be plotted on a kernel density estimation plot. Please broaden the search criteria.', xy=(0.25, 0.92), xycoords='figure fraction', horizontalalignment='left',
                       verticalalignment='top',
                       fontsize=8, color='#555555')
        map_df.plot(linewidth=0.5, ax=ax[0], edgecolor='#c1c1c1', color='white')
        map_pos_wards.plot(linewidth=0.8, ax=ax[0], edgecolor='#c1c1c1', column=map_pos_wards['price'], vmin=xmin,
                           vmax=xmax, cmap=cmap, zorder=10)
        ax[0].add_patch(PolygonPatch(j_map, color='gray', ec='#c1c1c1'))

    #   set up the colorbar
    sm = plt.cm.ScalarMappable(cmap=cmap, norm=plt.Normalize(vmin=xmin, vmax=xmax))
    sm._A = []
    fig.colorbar(sm,format='£%.0f',ax=ax[0])
    print('Map plotted.\nCreating a scatter plot...', end='')

    #   scatterplot - sorted by average district price
    sns.set(style="whitegrid")
    selected_properties = curated_data[curated_data.district.isin(highest_dist)].sort_values(by='district')
    avg_d_for_sorting = selected_properties[['district','price']].groupby('district').mean()
    avg_d_for_sorting.columns = ['avgprice']
    selected_properties = pd.merge(selected_properties,avg_d_for_sorting,left_on='district',right_on=avg_d_for_sorting.index).sort_values(by='avgprice')

    #   created a simple formula for the marker size
    plot_markersize = [abs(int(-40*a+120)) for a in selected_properties['distance']]
    sns.scatterplot(x="price", y="district", hue='district',
                  data=selected_properties,
                  alpha=0.6, edgecolor='none',zorder=4, s=plot_markersize)


    # adding range bars to the scatterplot (min and max) - it's actually two overlapping bars, one white one colored
    barplotdata = selected_properties[['district','price']]
    bpdata = [[barplotdata[barplotdata['district']==row['district']].min().values[1] for index,row in barplotdata.iterrows()],
              [barplotdata[barplotdata['district'] == row['district']].max().values[1] for index, row in
               barplotdata.iterrows()]]
    barplotdata.loc[:,'min'] = bpdata[0]
    barplotdata.loc[:,'max'] = bpdata[1]

    #   calculating the averages and ensuring each district has only 1 result (to prevent multiple circles on top of each other
    average_circles = [(row['district'], barplotdata[barplotdata['district'] == row['district']].mean().values[0]) for index, row in
               barplotdata.iterrows()]
    avgC = []
    for i in range(len(average_circles)):
        try:
            if average_circles[i][0] == average_circles[i+1][0]:
                avgC.append(np.nan)
            else:
                avgC.append(int(average_circles[i][1]))

        except:
            if i+1 == len(average_circles):
                avgC.append(int(average_circles[i][1]))
            else:
                avgC.append(np.nan)
            pass
    barplotdata.loc[:,'avg'] = avgC

    #   legend for the scatterplot
    legend1 = Line2D(range(1), range(1), linewidth=0, marker='o', markerfacecolor='gray',
                     markeredgewidth=0, markersize=min(plot_markersize) ** (1 / 2.0), alpha=0.6)
    legend2 = Line2D(range(1), range(1), linewidth=0, marker='o', markerfacecolor='gray',
                     markeredgewidth=0, markersize=max(plot_markersize) ** (1 / 2.0), alpha=0.6)
    legend3 = Line2D(range(1), range(1), linewidth=0, marker='o', markersize=5, markerfacecolor="white", fillstyle=None,
                     markeredgecolor='gray', markeredgewidth=0.5, alpha=0.5)
    legend4 = Line2D(range(1), range(1), color="#c6e2ff", linewidth=5, alpha=0.5)
    ax[1].legend((legend1, legend2, legend3, legend4), (
    "{0:.1f} miles\nfrom station".format(max(selected_properties['distance'])),
    "{0:.1f} miles\nfrom station".format(min(selected_properties['distance'])), 'District mean', 'Range'), numpoints=1,
                 loc=1, fontsize=8)

    # actual barplots and scatterplot
    sns.barplot(x='max', y=barplotdata['district'], data=barplotdata,
                 label="Total", alpha=0.5,ci=None,zorder=2)
    sns.barplot(x='min', y=barplotdata['district'], data=barplotdata,
                label="Total", color="white",zorder=3)

    sns.scatterplot(size=5, x=barplotdata['avg'], y=barplotdata['district'], legend=False,
                  data=barplotdata, markers="o", color='white', edgecolor='black',alpha=0.5, zorder=5)  # sns.color_palette("hls", len(new_bar.index)) #sns.hls_palette(8, l=.3, s=.8))             data=new_bar, join=False, markers="s", palette=['black'],zorder=5, alpha=0.5) #sns.color_palette("hls", len(new_bar.index)) #sns.hls_palette(8, l=.3, s=.8))

    print('Scatter plot created. \nAdjusting axes and adding annotations...')

    ax[1].set_xlabel('Price', fontsize=10, color='#555555', fontweight='bold')
    ax[1].set_ylabel('District', fontsize=10, color='#555555', fontweight='bold')
    ax[1].set_xlim([xmin, xmax])

    plt.xticks(fontsize=8, rotation=90)
    ax[1].xaxis.set_major_formatter(FormatStrFormatter('£%.0f'))
    ax[1].xaxis.grid(False)

    plt.yticks(fontsize=8)

    # ANNOTATIONS
    #   add a title...
    ax[0].set_title('Rental property pricing in London on %s\n' % str(datetime.now())[:10], fontsize=16,
                    fontweight='bold', color='#333333')

    # create an annotations for the logo description, and query details
    ax[0].annotate('Powered by:', xy=(0.022, 0.975), xycoords='figure fraction', horizontalalignment='left',
                   verticalalignment='top',
                   fontsize=8, color='#555555')
    fig.figimage(plt.imread('data/rm_logo.png'), xo=50, yo=1520)

    ax[0].annotate('Search query details', xy=(0.022, 0.90), xycoords='figure fraction', horizontalalignment='left',
                   verticalalignment='top', fontsize=8, color='#555555', fontweight='bold')
    ax[0].annotate('Price range:', xy=(0.022, 0.88), xycoords='figure fraction', horizontalalignment='left',
                   verticalalignment='top', fontsize=8, color='#555555')
    ax[0].annotate('£%i - %i' % (inputdict['minprice'],inputdict['maxprice']), xy=(0.17, 0.88), xycoords='figure fraction',
                   horizontalalignment='left',
                   verticalalignment='top', fontsize=8, color='#555555')
    ax[0].annotate('Search radius:', xy=(0.022, 0.86), xycoords='figure fraction', horizontalalignment='left',
                   verticalalignment='top', fontsize=8, color='#555555')
    ax[0].annotate(str(inputdict['radius']), xy=(0.17, 0.86), xycoords='figure fraction', horizontalalignment='left',
                   verticalalignment='top', fontsize=8, color='#555555')
    ax[0].annotate('Property type:', xy=(0.022, 0.84), xycoords='figure fraction', horizontalalignment='left',
                   verticalalignment='top', fontsize=8, color='#555555')
    ax[0].annotate(inputdict['propertytype'], xy=(0.17, 0.84), xycoords='figure fraction', horizontalalignment='left',
                   verticalalignment='top', fontsize=8, color='#555555')
    ax[0].annotate('No. of bedrooms:', xy=(0.022, 0.82), xycoords='figure fraction', horizontalalignment='left',
                   verticalalignment='top', fontsize=8, color='#555555')
    ax[0].annotate(str(inputdict['min_number_bedrooms'])+' - '+str(inputdict['max_number_bedrooms']), xy=(0.17, 0.82), xycoords='figure fraction', horizontalalignment='left',
                   verticalalignment='top', fontsize=8, color='#555555')
    if len(inputdict['furnishTypes'])>0:
        ax[0].annotate('Furnished:', xy=(0.022, 0.80), xycoords='figure fraction', horizontalalignment='left',
                       verticalalignment='top', fontsize=8, color='#555555')
        ax[0].annotate(str(inputdict['furnishTypes']), xy=(0.17, 0.80), xycoords='figure fraction', horizontalalignment='left',
                       verticalalignment='top', fontsize=8, color='#555555')
    if len(inputdict['letType'])>0:
        ax[0].annotate('Let type:', xy=(0.022, 0.78), xycoords='figure fraction', horizontalalignment='left',
                       verticalalignment='top', fontsize=8, color='#555555')
        ax[0].annotate(str(inputdict['letType']), xy=(0.17, 0.78), xycoords='figure fraction', horizontalalignment='left',
                       verticalalignment='top', fontsize=8, color='#555555')
    if len(inputdict['includeLetAgreed'])>0:
        ax[0].annotate('Incl. let agreed:', xy=(0.022, 0.76), xycoords='figure fraction', horizontalalignment='left',
                       verticalalignment='top', fontsize=8, color='#555555')
        ax[0].annotate(str(inputdict['includeLetAgreed']), xy=(0.17, 0.76), xycoords='figure fraction', horizontalalignment='left',
                       verticalalignment='top', fontsize=8, color='#555555')


    ax[0].annotate('Results summary', xy=(0.022, 0.72), xycoords='figure fraction', horizontalalignment='left',
                   verticalalignment='top', fontsize=8, color='#555555', fontweight='bold')
    ax[0].annotate('No. of reported properties:', xy=(0.022, 0.70), xycoords='figure fraction',
                   horizontalalignment='left', verticalalignment='top', fontsize=8, color='#555555')
    ax[0].annotate(no_of_total_results, xy=(0.17, 0.70), xycoords='figure fraction',
                   horizontalalignment='left', verticalalignment='top', fontsize=8, color='#555555')
    ax[0].annotate('No. of initial results:', xy=(0.022, 0.68), xycoords='figure fraction', horizontalalignment='left',
                   verticalalignment='top', fontsize=8, color='#555555')
    ax[0].annotate(len(rm.index), xy=(0.17, 0.68), xycoords='figure fraction', horizontalalignment='left',
                   verticalalignment='top', fontsize=8, color='#555555')
    ax[0].annotate('No. of final results:', xy=(0.022, 0.66), xycoords='figure fraction',
                   horizontalalignment='left', verticalalignment='top', fontsize=8, color='#555555')
    ax[0].annotate(str(len(curated_data.index))+' ({0:.0f}%)'.format(100*len(curated_data.index)/no_of_total_results), xy=(0.17, 0.66), xycoords='figure fraction',
                   horizontalalignment='left', verticalalignment='top', fontsize=8, color='#555555')
    ax[0].annotate('No. of wards:', xy=(0.022, 0.64), xycoords='figure fraction',
                   horizontalalignment='left', verticalalignment='top', fontsize=8, color='#555555')
    ax[0].annotate(len(avg_ward_prices.index), xy=(0.17, 0.64), xycoords='figure fraction',
                   horizontalalignment='left', verticalalignment='top', fontsize=8, color='#555555')
    ax[0].annotate('No. of districts:', xy=(0.022, 0.62), xycoords='figure fraction',
                   horizontalalignment='left', verticalalignment='top', fontsize=8, color='#555555')
    ax[0].annotate(len(avg_district_prices.index), xy=(0.17, 0.62), xycoords='figure fraction',
                   horizontalalignment='left', verticalalignment='top', fontsize=8, color='#555555')
    ax[0].annotate('Median price:', xy=(0.022, 0.60), xycoords='figure fraction',
                   horizontalalignment='left', verticalalignment='top', fontsize=8, color='#555555')
    ax[0].annotate('£'+str(int(np.median(curated_data['price']))), xy=(0.17, 0.60), xycoords='figure fraction',
                   horizontalalignment='left', verticalalignment='top', fontsize=8, color='#555555')

    ax[0].annotate('Top results', xy=(0.18, 0.37), xycoords='figure fraction', horizontalalignment='left',
                   verticalalignment='top', fontsize=10, color='#555555', fontweight='bold')
    plt.subplots_adjust(left=0.17, bottom=0.09, right=0.95, top=0.9)

    #   output files' formats
    plotdirs = {'pdf': 'output/' + str(datetime.now())[:10] + '_' + str(inputdict['minprice']) + '_' + str(inputdict['maxprice']) +'_'+ str(inputdict['min_number_bedrooms']) + '_' + str(inputdict['max_number_bedrooms']) + '_'  + '_' + str(inputdict['propertytype']) + '.pdf',
                'jpeg': 'output/' + str(datetime.now())[:10] + '_' + str(inputdict['minprice']) + '_' + str(inputdict['maxprice']) +'_'+ str(inputdict['min_number_bedrooms']) + '_' + str(inputdict['max_number_bedrooms']) + '_'  + '_' + str(inputdict['propertytype']) + '.jpeg'}

    print('Saving figures to the following directories:')
    print('\t'+plotdirs['jpeg'])
    plt.savefig(plotdirs['jpeg'], dpi=200, bbox_inches='tight',edgecolor='black')
    print('\t'+plotdirs['pdf'])
    plt.savefig(plotdirs['pdf'], dpi=200, bbox_inches='tight', edgecolor='black')

    # timer measures the time from beginning to the plot saving
    global timer2
    timer2 = datetime.now()
Esempio n. 3
0
def pairplot(df, **kwargs):
    import numpy as np
    import matplotlib.pyplot as plt
    from matplotlib.figure import figaspect
    import seaborn as sns

    def corrplot(x,
                 y,
                 data,
                 cmap='coolwarm',
                 correlation='spearman',
                 **kwargs):
        from scipy import stats
        from matplotlib.patches import Ellipse

        data_x = data[x]
        data_y = data[y]
        is_x_category = data_x.dtype.name == 'category'
        is_y_category = data_y.dtype.name == 'category'
        if is_x_category:
            data_x = data_x.cat.codes
        if is_y_category:
            data_y = data_y.cat.codes
        if correlation is 'pearson' or not is_x_category or not is_y_category:
            method = 'pearson'
        else:
            method = 'spearman'
        r = data_x.corr(data_y, method=method)
        if type(cmap) is str:
            cmap = plt.get_cmap(cmap)
        color = cmap((r + 1) / 2)
        ax.axis('off')
        ax.add_artist(
            Ellipse((0.5, 0.5),
                    width=np.sqrt(1 + r),
                    height=np.sqrt(1 - r),
                    angle=45,
                    color=color))
        ax.text(0.5,
                0.5,
                '{:.2f}'.format(r),
                size='x-large',
                horizontalalignment='center',
                verticalalignment='center')

    def crosstabplot(x, y, data, ax, **kwargs):
        import pandas as pd

        cross = pd.crosstab(data[x], data[y]).values
        size = cross / cross.max() * 500
        crosstab_kws = kwargs[
            'crosstab_kws'] if 'crosstab_kws' in kwargs else {}
        scatter_kws = dict(color=sns.color_palette()[0], alpha=0.3)
        scatter_kws.update(crosstab_kws['scatter_kws'] if 'scatter_kws' in
                           crosstab_kws else {})
        text_kws = dict(size='x-large')
        text_kws.update(crosstab_kws['text_kws'] if 'text_kws' in
                        crosstab_kws else {})
        for (xx, yy), count in np.ndenumerate(cross):
            ax.scatter(xx, yy, s=size[xx, yy], **scatter_kws)
            ax.text(xx,
                    yy,
                    count,
                    horizontalalignment='center',
                    verticalalignment='center',
                    **text_kws)

    def show_off_legend(ax):
        legend = ax.get_legend()
        if legend:
            legend.set(visible=False)

    n_variables = df.columns.size
    hue = kwargs['hue'] if 'hue' in kwargs else None
    figsize = kwargs['figsize'] if 'figsize' in kwargs else figaspect(
        1) * 0.5 * n_variables
    _, axes = plt.subplots(n_variables, n_variables, figsize=figsize)
    plt.subplots_adjust(hspace=0.1, wspace=0.1)

    for i in range(n_variables):
        axes[i, i].get_shared_x_axes().join(*axes[i:n_variables, i])
        if i > 1:
            axes[i, 0].get_shared_y_axes().join(*axes[i, :i - 1])

    for (row, col), ax in np.ndenumerate(axes):
        x = df.columns[col]
        y = df.columns[row]
        x_data = df[x]
        y_data = df[y]
        x_dtype = x_data.dtype.name
        y_dtype = y_data.dtype.name
        is_x_category = x_dtype == 'category'
        is_y_category = y_dtype == 'category'
        if is_x_category:
            x_categories = x_data.cat.categories
        if is_y_category:
            y_categories = y_data.cat.categories

        if row == col:  # diagonal
            hue_data = df[hue] if hue else None
            if is_x_category:
                bar_kws = dict(alpha=0.4, orientation='vertical')
                bar_kws.update(kwargs['bar_kws'] if 'bar_kws' in
                               kwargs else {})
                if hue:
                    cross = pd.crosstab(x_data, hue_data)
                    cross.index = cross.index.categories
                    if hue_data.dtype.name == 'category':
                        cross.columns = cross.columns.categories
                    else:
                        cross.columns = hue_data.unique()
                    cross.reset_index(inplace=True)
                    melt = pd.melt(cross, id_vars='index', var_name='hue')
                    hue_values = melt['hue'].unique()
                    colors = sns.color_palette(n_colors=hue_values.size)
                    for i in range(hue_values.size):
                        hue_value = hue_values[i]
                        color = colors[i]
                        subset = melt[melt['hue'] == hue_value]
                        if i is 0:
                            bottom = 0
                        else:
                            bottom = melt.loc[melt['hue'].isin(
                                hue_values[:i])].groupby('index').sum().loc[
                                    subset['index']].values.ravel()
                        ax.bar(subset['index'],
                               subset['value'],
                               bottom=bottom,
                               color=color,
                               **bar_kws)
                else:
                    cross = pd.crosstab(x_data, []).values.ravel()
                    sns.barplot(x_data.cat.categories,
                                cross,
                                color=sns.color_palette()[0],
                                ci=None,
                                ax=ax,
                                **bar_kws)
            else:
                dist_kws = kwargs['dist_kws'] if 'dist_kws' in kwargs else {}
                if hue:
                    colors = sns.color_palette(n_colors=hue_data.unique().size)
                    hist_kws = dict(color=colors, alpha=0.4)
                    hist_kws.update(dist_kws['hist_kws'] if 'hist_kws' in
                                    dist_kws else {})
                    if hue_data.dtype.name == 'category':
                        hue_values = df[hue].cat.categories
                    else:
                        hue_values = df[hue].unique()
                    ax.hist([df.loc[df[hue] == v, x] for v in hue_values],
                            density=True,
                            histtype='barstacked',
                            **hist_kws)
                    for c, v in zip(colors, hue_values):
                        sns.distplot(df.loc[df[hue] == v, x],
                                     hist=False,
                                     color=c,
                                     ax=ax,
                                     **dist_kws)
                else:
                    sns.distplot(x_data, ax=ax, **dist_kws)
        elif row < col:  # upper
            corr_kws = kwargs['corr_kws'] if 'corr_kws' in kwargs else {}
            corrplot(x, y, data=df, **corr_kws)
        else:  # lower
            if is_x_category and is_y_category:
                crosstabplot(x, y, data=df, ax=ax)
            else:
                violin_kws = kwargs[
                    'violin_kws'] if 'violin_kws' in kwargs else {}
                if is_x_category or is_y_category:
                    orient = 'v' if is_x_category else 'h'
                    sns.violinplot(x,
                                   y,
                                   hue,
                                   df,
                                   orient=orient,
                                   ax=ax,
                                   **violin_kws)
                    show_off_legend(ax)
                else:
                    scatter_kws = kwargs[
                        'scatter_kws'] if 'scatter_kws' in kwargs else {}
                    sns.scatterplot(x, y, hue, data=df, ax=ax, **scatter_kws)
                    show_off_legend(ax)

        if row < n_variables - 1:
            ax.set(xlabel='')
            for ticklabel in ax.get_xticklabels():
                ticklabel.set(visible=False)
        else:
            ax.set(xlabel=x)
            if is_x_category:
                ax.set(xticks=np.arange(x_categories.size),
                       xticklabels=x_data.cat.categories)
        if col > 0:
            ax.set(ylabel='')
            for ticklabel in ax.get_yticklabels():
                ticklabel.set(visible=False)
        else:
            ax.set(ylabel=y)
            if row > 0 and is_y_category:
                ax.set(yticks=np.arange(y_categories.size),
                       yticklabels=y_data.cat.categories)
    return axes
Esempio n. 4
0
                                            verbose=True)
methylation_results_df.sort_values(by='p_value').head(n=10)

# In[7]:

expression_results_df['nlog10_p'] = -np.log10(expression_results_df.corr_pval)
methylation_results_df['nlog10_p'] = -np.log10(
    methylation_results_df.corr_pval)

sns.set({'figure.figsize': (20, 8)})
fig, axarr = plt.subplots(1, 2)

# plot cancer type prediction from expression, in a volcano-like plot
sns.scatterplot(data=expression_results_df,
                x='delta_mean',
                y='nlog10_p',
                hue='reject_null',
                hue_order=[False, True],
                ax=axarr[0])
# add vertical line at 0
axarr[0].axvline(x=0, linestyle=':', color='black')
# add horizontal line at statistical significance threshold
l = axarr[0].axhline(y=-np.log10(SIG_ALPHA), linestyle=':')
# label horizontal line with significance threshold
# (matplotlib makes this fairly difficult, sadly)
axarr[0].text(0.9,
              -np.log10(SIG_ALPHA) + 0.3,
              r'$\alpha = {}$'.format(SIG_ALPHA),
              va='center',
              ha='center',
              color=l.get_color(),
              backgroundcolor=axarr[0].get_facecolor())
Esempio n. 5
0
# the predicted values versus the true values.

# %%
predicted_actual = {
    "True values (k$)": target_test,
    "Predicted values (k$)": target_predicted
}
predicted_actual = pd.DataFrame(predicted_actual)

# %%
import matplotlib.pyplot as plt
import seaborn as sns

sns.scatterplot(data=predicted_actual,
                x="True values (k$)",
                y="Predicted values (k$)",
                color="black",
                alpha=0.5)
plt.axline((0, 0), slope=1, label="Perfect fit")
plt.axis('square')
_ = plt.title("Regression using a model without \ntarget transformation")

# %% [markdown]
# On this plot, correct predictions would lie on the diagonal line. This plot
# allows us to detect if the model makes errors in a consistent way, i.e.
# has some bias.
#
# On this plot, we see that for the large True price values, our model tends to
# under-estimate the price of the house. Typically, this issue arises when the
# target to predict does not follow a normal distribution. In this case the
# model would benefit from target transformation.
Esempio n. 6
0
#feature names
print(data["feature_names"])

#concatenating the target class with the input classes.
dataframe = pd.DataFrame(np.c_[data["data"], data["target"]],
                         columns=np.append(data["feature_names"], ["target"]))

# visualization
sns.pairplot(dataframe,
             hue="target",
             vars=[
                 "mean radius", "mean area", "mean smoothness", "mean texture",
                 "mean perimeter", "mean compactness", "mean symmetry"
             ])
sns.scatterplot(x="mean radius",
                y="mean compactness",
                data=dataframe,
                hue="target")
sns.scatterplot(x="mean radius",
                y="mean smoothness",
                data=dataframe,
                hue="target")
sns.scatterplot(x="mean radius",
                y="mean symmetry",
                data=dataframe,
                hue="target")

# checking the correlation between the features.
sns.heatmap(dataframe.corr("kendall"))

# seperating out the input and output for training our model.
X = dataframe.iloc[:, :30]
            check = True
            kf.transitionMatrix = np.array([[1, 0, DELTA_T, 0],
                                            [0, 1, 0, DELTA_T], [0, 0, 1, 0],
                                            [0, 0, 0, -1 * BOUNCE_COEFF]])

        kf.predict(const_mat)

        if check:
            kf.transitionMatrix = np.array([[1, 0, DELTA_T, 0],
                                            [0, 1, 0, DELTA_T], [0, 0, 1, 0],
                                            [0, 0, 0, 1]])

    cv2.imshow('frame', fullgray)
    cv2.waitKey(50)

    cv2.imshow('frame2', res2)
    cv2.waitKey(50)

cap.release()
cv2.destroyAllWindows()

sns.scatterplot(*zip(*graph))
plt.show()

base = snapshots[0]
for img in snapshots:
    base = cv2.bitwise_or(base, img)

plt.imshow(base)
plt.show()
def graf_scatter(x, y):
    'ScatterPlot'
    sns.set_theme(style="darkgrid")
    sns.scatterplot(x=x, y=y, palette='deep')
    return
Esempio n. 9
0
from keras.layers import Dense, LSTM, Activation, Input
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error
import matplotlib.pyplot as plt

df = pd.read_csv(
    r"C:\Users\nisha_000\Documents\GitHub\WAIxASUA\MCI_2014_to_2018.csv")

test = df.loc[df["MCI"] == "Theft Over", df.columns[[0, 1,
                                                     4]]]  #4 = occurencedate
test["occurrencedate"] = pd.to_datetime(
    test["occurrencedate"])  #.astype(int)/10**17
test = test.sort_values('occurrencedate').drop('occurrencedate',
                                               axis='columns')

sns.scatterplot(test["X"], test["Y"])

dataset = test.values
dataset = dataset.astype('float32')
# normalize the dataset
scaler = MinMaxScaler(feature_range=(0, 1))
dataset = scaler.fit_transform(dataset)
sns.scatterplot(dataset[:, 0], dataset[:, 1])

# test model 1
X = dataset[:-1]
Y = dataset[1:]
Y_og = Y
nrow = len(X)

# reshape into X=t and Y=t+1
Esempio n. 10
0
# # improvement - plot sample name next to (or overlapping) points
def label_point(x, y, val, ax):
    a = pd.concat({'x': x, 'y': y, 'val': val}, axis=1)
    for i, point in a.iterrows():
        ax.text(point['x'] + .02,
                point['y'],
                str(point['val']),
                fontsize='small')


with PdfPages('multipage_pdf.pdf') as pdf:
    # scatter 1
    sns.set_style("whitegrid")
    plt.figure(figsize=(7, 7))
    ax = sns.scatterplot(data=pca_tot, x="pc1", y="pc2", hue="celltype")
    ax.set_xlabel('Principal Component 1', fontsize=15)
    ax.set_ylabel('Principal Component 2', fontsize=15)
    ax.set_title('Preliminary PCA for PCA generator, all genes', fontsize=20)
    pdf.savefig()  # saves the current figure into a pdf page
    plt.close()
    # scatter 2
    plt.figure(figsize=(7, 7))
    ax = sns.scatterplot(data=pca_tot, x="pc1", y="pc2", hue="celltype")
    ax.set_xlabel('Principal Component 1', fontsize=15)
    ax.set_ylabel('Principal Component 2', fontsize=15)
    ax.set_title('Preliminary PCA for PCA generator, all genes', fontsize=20)
    label_point(pca_tot.pc1, pca_tot.pc2, pca_tot.id, plt.gca())
    pdf.savefig()  # saves the current figure into a pdf page
    plt.close()
    # scatter 3
Esempio n. 11
0
def circumplex_scatter(
    data,
    ax=None,
    title="Soundscape Scatter Plot",
    hue=None,
    x="ISOPleasant",
    y="ISOEventful",
    prim_labels=True,
    diagonal_lines=False,
    figsize=(5, 5),
    palette=None,
    legend=False,
    legend_loc="lower left",
    s=10,
    **scatter_kwargs,
):
    """Plot ISOcoordinates as scatter points on a soundscape circumplex grid

    Makes use of seaborn.scatterplot

    Parameters
    ----------
    ax : plt.Axes, optional
        existing matplotlib axes, by default None
    title : str, optional
        , by default "Soundscape Scatter Plot"
    hue : vector or key in data, optional
        Grouping variable that will produce points with different colors. Can be either categorical or numeric, although color mapping will behave differently in latter case, by default None
    x : str, optional
        column name for x variable, by default "ISOPleasant"
    y : str, optional
        column name for y variable, by default "ISOEventful"
    prim_labels : bool, optional
        whether to include ISOPleasant and ISOeventful labels, by default True
    diagonal_lines : bool, optional
        whether to include diagonal dimension labels (e.g. calm, etc.), by default False
    figsize : tuple, optional
        by default (5, 5)
    palette : string, list, dict or matplotlib.colors.Colormap, optional
        Method for choosing the colors to use when mapping the hue semantic. String values are passed to seaborn.color_palette(). List or dict values imply categorical mapping, while a colormap object implies numeric mapping.
        by default None
    legend : bool, optional
        whether to include legend with the hue values, by default False
    legend_loc : str, optional
        relative location of legend, by default "lower left"
    s : int, optional
        size of scatter points, by default 10

    Returns
    -------
    plt.Axes
    """
    if ax is None:
        fig, ax = plt.subplots(1, 1, figsize=figsize)
    if palette is None:
        n_colors = len(data[hue].unique()) if hue else len(data)
        palette = sns.color_palette("husl", n_colors, as_cmap=False)

    s = sns.scatterplot(
        data=data,
        x=x,
        y=y,
        hue=hue,
        s=s,
        ax=ax,
        legend=legend,
        palette=palette,
        zorder=data_zorder,
        **scatter_kwargs,
    )
    ax = _deal_w_default_labels(ax, prim_labels)
    _circumplex_grid(ax, prim_labels, diagonal_lines)
    _set_circum_title(ax, prim_labels, title)
    if legend:
        _move_legend(ax, legend_loc)
    return s
Esempio n. 12
0
def circumplex_density(
    data,
    ax=None,
    title="Soundscape Density Plot",
    x="ISOPleasant",
    y="ISOEventful",
    prim_labels=True,
    diagonal_lines=False,
    incl_scatter=False,
    incl_outline=False,
    figsize=(5, 5),
    palette="Blues",
    scatter_color="black",
    outline_color="black",
    fill_color="blue",
    fill=True,
    hue=None,
    common_norm=False,
    bw_adjust=default_bw_adjust,
    alpha=0.95,
    legend=False,
    legend_loc="lower left",
    s=10,
    scatter_kwargs={},
    **density_kwargs,
):
    """Create a bivariate distribution plot of ISOCoordinates

    This method works by creating a circumplex_grid, then overlaying a sns.kdeplot() using the ISOCoordinate data. IF a scatter is also included, it overlays a sns.scatterplot() using the given options underneath the density plot.

    If using a hue grouping, it is recommended to only plot the 50th percentile contour so as to not create a cluttered figure. This can be done with the options thresh = 0.5, levels = 2.

    Parameters
    ----------
    ax : plt.Axes, optional
        existing subplot axes, by default None
    title : str, optional
        by default "Soundscape Density Plot"
    x : str, optional
        column name for x variable, by default "ISOPleasant"
    y : str, optional
        column name for y variable, by default "ISOEventful"
    prim_labels : bool, optional
        whether to include ISOPleasant and ISOEventful axis labels, by default True
    diagonal_lines : bool, optional
        whether to include diagonal dimension axis labels (i.e. calm, etc.), by default False
    incl_scatter : bool, optional
        plot coordinate scatter underneath density plot, by default False
    incl_outline : bool, optional
        include a thicker outline around the density plot, by default False
    figsize : tuple, optional
        by default (5, 5)
    palette : str, optional
        Method for choosing the colors to use when mapping the hue semantic. String values are passed to seaborn.color_palette(). List or dict values imply categorical mapping, while a colormap object implies numeric mapping.
        by default "Blues"
    scatter_color : str, optional
        define a color for the scatter points. Does not work with a hue grouping variable, by default "black"
    outline_color : str, optional
        define a color for the add'l density outline, by default "black"
    fill_color : str, optional
        define a color for the density fill, does not work with a hue grouping variable, by default "blue"
    fill : bool, optional
        whether to fill the density plot, by default True
    hue : vector or key in data, optional
        Grouping variable that will produce points with different colors. Can be either categorical or numeric, although color mapping will behave differently in latter case, by default None
    common_norm : bool, optional
        [description], by default False
    bw_adjust : [type], optional
        [description], by default default_bw_adjust
    alpha : float, optional
        [description], by default 0.95
    legend : bool, optional
        whether to include the hue labels legend, by default False
    legend_loc : str, optional
        relative location of the legend, by default "lower left"
    s : int, optional
        size of the scatter points, by default 10
    scatter_kwargs : dict, optional
        additional arguments for sns.scatterplot(), by default {}

    Returns
    -------
    plt.Axes
    """
    if ax is None:
        fig, ax = plt.subplots(1, 1, figsize=figsize)

    if incl_scatter:
        d = sns.scatterplot(
            data=data,
            x=x,
            y=y,
            hue=hue,
            s=s,
            ax=ax,
            legend=legend,
            color=scatter_color,
            palette=palette,
            zorder=data_zorder,
            **scatter_kwargs,
        )

    if incl_outline:
        d = sns.kdeplot(
            data=data,
            x=x,
            y=y,
            fill=False,
            ax=ax,
            alpha=1,
            color=outline_color,
            palette=palette,
            hue=hue,
            common_norm=common_norm,
            legend=legend,
            zorder=data_zorder,
            bw_adjust=bw_adjust,
            **density_kwargs,
        )

    d = sns.kdeplot(
        data=data,
        x=x,
        y=y,
        fill=fill,
        ax=ax,
        alpha=alpha,
        palette=palette,
        color=fill_color,
        hue=hue,
        common_norm=common_norm,
        legend=legend,
        zorder=data_zorder,
        bw_adjust=bw_adjust,
        **density_kwargs,
    )

    _circumplex_grid(ax, prim_labels, diagonal_lines)
    _set_circum_title(ax, prim_labels, title)
    _deal_w_default_labels(ax, prim_labels)
    if legend:
        _move_legend(ax, legend_loc)
    return d
Esempio n. 13
0
"""Plot accuracy vs model size for our visual wake word trained models."""
import seaborn as sns
from matplotlib import pyplot as plt
from pathlib import Path
import pandas as pd

accuracy = Path.cwd() / 'accuracy.log'
sizes = Path.cwd() / "sizes.log"

acc_dict = eval(accuracy.read_text())
size_dict = eval(sizes.read_text())

data = []
for model_name in acc_dict.keys():
    data.append((model_name, acc_dict[model_name], size_dict[model_name]))

df = pd.DataFrame(data)
df.columns = ['model', 'accuracy', 'size']
print(df)

sns.scatterplot(y='accuracy', x='size', data=df)
plt.title("Visual Wake Words Model Test")
plt.xlabel("Bytes")
plt.ylabel("Dev set accuracy")
plt.show()
Esempio n. 14
0
# I'd make their values all 0, but that would skew the model to cluster pitchers into what they DONT pitch
# Imputing meaans shouldn't affect the model, and doesn't pretend to know what a pitcher's unknown pitches would be like.
df3b = df3.fillna(df3.mean())

######   Clustering Analysis   ######
from sklearn.cluster import KMeans
import seaborn as sns

n_clusters = [3, 5, 8, 10, 15, 20, 25, 30, 40, 50]
error = list()
for clusters in n_clusters:
    print(clusters, 'clusters...')
    kmeans = KMeans(n_clusters=clusters, random_state=523).fit(df3b)
    error.append(kmeans.inertia_)

sns.scatterplot(n_clusters, error)

# Should really be looking for elbow in the above scatterplot, but hard to see one...going with 15.
# Make use the average silouette method, or gap statistic method, to find a better number. TBD.

kmeans = KMeans(n_clusters=15, random_state=523).fit(df3b)
idx = kmeans.fit_predict(df3b)
pitcher_years = df3b.index.tolist()
final = pd.DataFrame({'pitcher_year': pitcher_years, 'cluster': idx})
final['pitcher'] = final['pitcher_year'].apply(lambda x: x[:len(x) - 7])
final['year'] = final['pitcher_year'].apply(lambda x: x[-4:])

### NEXT STEP: ATTACH FIPS FOR EACH PLAYER_YEAR, THEN LOOK AT TRENDS PER CLUSTER
# ALSO NEED TO INVESTIGATE WHY DATA ONLY UP TO 2017!

lahman_pitching = pd.read_csv('lahman/core/pitching.csv')
Esempio n. 15
0
def ratioConc(ax,
              respDF,
              cell1,
              cell2,
              time,
              mutAffDF,
              pseudo=0.1,
              legend=False):
    """Plots Ratio of cell 1 to cell 2 over a range of concentrations"""
    hillDF = hillRatioDosePlot(ax[0],
                               respDF,
                               time,
                               cell1,
                               cell2,
                               pseudo=pseudo)
    fitDF = pd.DataFrame()
    for ligand in hillDF.Ligand.unique():
        for valency in hillDF.loc[hillDF.Ligand == ligand].Valency.unique():
            isoData = hillDF.loc[(hillDF.Ligand == ligand)
                                 & (hillDF.Valency == valency)]
            fitDF = pd.concat([
                fitDF,
                pd.DataFrame({
                    "Ligand": [ligand],
                    "Valency":
                    valency,
                    cell2 + " Max":
                    isoData.Ratio.max(),
                    cell2 + " Dose":
                    isoData.loc[isoData.Ratio ==
                                isoData.Ratio.max()].Dose.values
                })
            ])

    ax[0].set(title="Ratio of " + cell1 + " to " + cell2)

    if legend:
        h, l = ax[0].get_legend_handles_labels()
        ax[0].legend(h[-3:], l[-3:])

    fitDF = fitDF.merge(mutAffDF)

    maxLineDF = pd.DataFrame()
    doseLineDF = pd.DataFrame()
    affs = np.linspace(-1, 1, 100).flatten()
    for valency in fitDF.Valency.unique():
        valData = fitDF.loc[fitDF.Valency == valency]
        mMax, bMax = np.polyfit(np.log10(valData["IL2Rα $K_{D}$ (nM)"].values),
                                valData[cell2 + " Max"], 1)
        mDose, bDose = np.polyfit(
            np.log10(valData["IL2Rα $K_{D}$ (nM)"].values),
            np.log10(valData[cell2 + " Dose"]), 1)
        maxLineDF = pd.concat([
            maxLineDF,
            pd.DataFrame({
                "Valency": valency,
                "IL2Rα $K_{D}$ (nM)": np.power(10, affs),
                cell2 + " Max": mMax * affs + bMax
            })
        ])
        doseLineDF = pd.concat([
            doseLineDF,
            pd.DataFrame({
                "Valency": valency,
                "IL2Rα $K_{D}$ (nM)": np.power(10, affs),
                cell2 + " Dose": np.power(10, mDose * affs + bDose)
            })
        ])

    maxLineDF, doseLineDF = maxLineDF.reset_index(), doseLineDF.reset_index()
    sns.scatterplot(data=fitDF,
                    x="IL2Rα $K_{D}$ (nM)",
                    y=cell2 + " Max",
                    hue="Ligand",
                    style="Valency",
                    ax=ax[1],
                    palette=ligDict,
                    legend=False)
    sns.lineplot(data=maxLineDF,
                 x="IL2Rα $K_{D}$ (nM)",
                 y=cell2 + " Max",
                 style="Valency",
                 ax=ax[1],
                 color="k",
                 linewidth=1.,
                 legend=False)
    ax[1].set(xscale="log",
              title="Ratio of " + cell1 + " to " + cell2,
              xlim=(1e-1, 1e1),
              ylim=(0, None),
              ylabel=cell1 + " to " + cell2 + " Ratio Max Magnitude")

    sns.scatterplot(data=fitDF,
                    x="IL2Rα $K_{D}$ (nM)",
                    y=cell2 + " Dose",
                    hue="Ligand",
                    style="Valency",
                    ax=ax[2],
                    palette=ligDict,
                    legend=False)
    sns.lineplot(data=doseLineDF,
                 x="IL2Rα $K_{D}$ (nM)",
                 y=cell2 + " Dose",
                 style="Valency",
                 ax=ax[2],
                 color="k",
                 linewidth=1.,
                 legend=False)
    ax[2].set(xscale="log",
              yscale="log",
              title="Ratio of " + cell1 + " to " + cell2,
              xlim=(1e-1, 1e1),
              ylim=(1e-2, 1e2),
              ylabel=cell1 + " to " + cell2 + " Ratio Max Dose")
Esempio n. 16
0
def calibrateExperiment(folderName, secondPath, concUnit, concUnitPrefix,
                        numberOfCalibrationSamples, initialStandardVolume):
    #Get cytokine calibration curve data
    tempExperimentParameters = {'overallPlateDimensions': [8, 12]}
    calibrationFileNames = glob.glob('inputData/bulkCSVFiles/Calibration*')
    print(calibrationFileNames)
    calibrationNames = []
    kitNames = []
    for calibrationFileName in calibrationFileNames:
        newName = calibrationFileName.split('.')[0].split('_')[0].split(
            '/')[-1]
        kitNames.append(newName)
    print(kitNames)
    sortedData, sortedFiles = cleanUpFlowjoCSV(kitNames, folderName, 'cyt',
                                               tempExperimentParameters)
    for i, newName in enumerate(kitNames):
        if '-' in newName:
            newName2 = newName.split('-')[1]
        else:
            newName2 = newName
        kitNames[i] = newName2
    rsquaredList = []
    concLODList = []
    fittingParametersList = []
    cbaStandardsMFIList = []
    cbaPlotPointsMFIList = []
    cbaStandardsConcentrationList = []
    cbaPlotPointsConcentrationList = []

    numberOfPlotPoints = 101
    xaxistitle = 'Concentration of Cytokine Standards Standards (' + concUnitPrefix + ')'
    yaxistitle = 'GeoMFI'

    allCytokinesHaveMWInDict = True
    for calibration in sortedData:
        cytokines = parseCytokineCSVHeaders(calibration.columns)
        for cytokine in cytokines:
            if cytokine[0] not in completeCytokineMWDict:
                allCytokinesHaveMWInDict = False
                print(cytokine[0])
    print('wat')
    print(allCytokinesHaveMWInDict)

    for calibration in sortedData:
        data = np.array(calibration.values[:, 1:-1], dtype=float)
        cytokines = parseCytokineCSVHeaders(calibration.columns)
        fittingParameters = np.zeros((data.shape[1], 4))
        concLOD = np.zeros((data.shape[1], 4))
        #Initial concentration all cytokine standards is given by CBA kit manual as 5000 pGg/mL: when standards are diluted in 2mL
        conc = 5000  #pg/mL
        serialDilutionFactor = 2  #1:serialDilutionFactor dilution between each standard well
        #Smaller initial dilution (0.5mL instead of 2mL for example) increase the initial concentration of the first calibration sample
        initialConc = (conc * 1e-12) / (
            (initialStandardVolume * 1e-3) / 2
        )  #g/L (pg/mL * 1e-12 g/pg)/(1e-3 L/mL)
        #Calibration samples are always diluted by a factor of serialdilutionFactor (so with 12 calibration samples, the last sample is (serialDilutionFactor^-11) the concentration of the first, which is pure standard (2^0)
        cbaStandardsConcentrations = np.flipud(initialConc * np.power(
            serialDilutionFactor,
            np.linspace(-numberOfCalibrationSamples + 1, 0,
                        numberOfCalibrationSamples)))
        #More x values along the above concentration bounds are sampled to use to construct calibration curve. Plot points are extended slightly at high range to allow visualization of upper LOD not accessible with experimental dilution
        cbaStandardsConcentrationsPlotPoints = np.flipud(
            initialConc * np.power(
                2,
                np.linspace(-numberOfCalibrationSamples + 1, 4,
                            numberOfPlotPoints)))

        cbaStandardsConcentrationMatrix = np.zeros(
            [len(cytokines), cbaStandardsConcentrations.shape[0]])
        cbaStandardsConcentrationPlotPointsMatrix = np.zeros(
            [len(cytokines), cbaStandardsConcentrationsPlotPoints.shape[0]])
        cbaStandardsMFIMatrix = np.zeros(
            [len(cytokines), cbaStandardsConcentrations.shape[0]])
        cbaStandardsMFIPlotPointsMatrix = np.zeros(
            [len(cytokines), cbaStandardsConcentrationsPlotPoints.shape[0]])
        color_list = sns.color_palette(sns.color_palette(), len(cytokines))
        print(cytokines)
        print(data)
        for i, cytokineList in enumerate(cytokines):
            cytokine = cytokineList[0]
            #amplitude bounded from range/2 to range*2, EC50 bounded from minimum to maximum standard concentration tested, Hill coefficient bounded from 0 to 2, Background bounded from 0 to minimum GFI*2
            lowerCurveFitBounds = [
                (np.max(data[:, i]) - np.min(data[:, i])) / 2,
                np.min(cbaStandardsConcentrations), 0, 0
            ]
            upperCurveFitBounds = [
                (np.max(data[:, i]) - np.min(data[:, i])) * 2,
                np.max(cbaStandardsConcentrations), 2,
                np.min(data[:, i]) * 2
            ]
            #use scipy curve fit to determine best hill equation fit for data, searching within the bounds given above
            popt, pcov = curve_fit(Hill,
                                   cbaStandardsConcentrations,
                                   np.log10(data[:, i]),
                                   sigma=np.log10(data[:, i]),
                                   bounds=(lowerCurveFitBounds,
                                           upperCurveFitBounds))
            rsquared = round(
                r_squared(cbaStandardsConcentrations, np.log10(data[:, i]),
                          Hill, popt), 3)
            print(rsquared)
            rsquaredList.append(rsquared)
            for j in range(len(popt)):
                #Convert just ec50 value to desired units (nM,uM etc) if cytokine has a molar mass in dict
                if j == 1 and allCytokinesHaveMWInDict:
                    fittingParameters[i, j] = np.multiply(
                        popt[j], (concUnit / completeCytokineMWDict[cytokine]))
                #other values in 4 parameter logistic equation are tied to intensity y-value, which doesn't change, or are the hill coefficient, which is completely separate, so parameters are kept the same
                else:
                    fittingParameters[i, j] = popt[j]

            #Convert x values of experimental data points and curve fit points to desired units (nM,uM,etc.)
            if allCytokinesHaveMWInDict:
                cbaStandardsConcentrationMatrix[i, :] = np.multiply(
                    cbaStandardsConcentrations,
                    (concUnit / completeCytokineMWDict[cytokine]))
                cbaStandardsConcentrationPlotPointsMatrix[i, :] = np.multiply(
                    cbaStandardsConcentrationsPlotPoints,
                    (concUnit / completeCytokineMWDict[cytokine]))
            else:
                cbaStandardsConcentrationMatrix[
                    i, :] = cbaStandardsConcentrations
                cbaStandardsConcentrationPlotPointsMatrix[
                    i, :] = cbaStandardsConcentrationsPlotPoints
            cbaStandardsMFIMatrix[i, :] = data[:, i]
            print(fittingParameters[i, :])
            cbaStandardsMFIPlotPointsMatrix[i, :] = np.power(
                10,
                Hill(cbaStandardsConcentrationPlotPointsMatrix[i, :],
                     *fittingParameters[i, :]))
            #Plot on log-log scale the experimental points and the curve fit line with previously determined curve fitting parameters
            #plt.loglog(cbaStandardsConcentrations,data[:,i],'o',color=color_list[i,:],label=listOfCytokines[i])
            #plt.loglog(cbaStandardsConcentrationsPlotPoints,np.power(10,Hill(convertedCBAStandardsPlotPoints,*fittingParameters[i,:])))
            #'_fit; R2 = '+str(rsquared)

            #Get LOD for each cytokine calibration curve (aka the linear range of the calibration curve)
            backgroundGFI = fittingParameters[i, 3]
            amplitudeGFI = fittingParameters[i, 0]

            #Approximate LOD by determining concentration values at LOD% and 1-LOD% (3% and 97%) of curve. Must be used on log10(curve), as calibration curve is plotted in logscale
            LODpercent = 0.03
            #LOD% more than background GFI used for lower LOD GFI
            lowerGFILOD = math.log10(10**((1 + LODpercent) *
                                          math.log10(backgroundGFI)))
            #LOD% less than maximum GFI (Background + amplitude) used for upper LOD GFI
            upperGFILOD = math.log10(
                10**((1 - LODpercent) *
                     math.log10(amplitudeGFI + backgroundGFI)))
            #Log10(upper/lowerGFILOD) converted back into normal GFI by 10 to its power, then fed into inverse hill equation with current cytokine fitting parameters to obtain corresponding concentration values
            lowerConcLOD = InverseHill(lowerGFILOD, fittingParameters[i, :])
            upperConcLOD = InverseHill(upperGFILOD, fittingParameters[i, :])
            #Create dict with keys as cytokines, values as GFI/conc LODs
            concLOD[i, :] = np.array(
                [10**lowerGFILOD, 10**upperGFILOD, lowerConcLOD, upperConcLOD])
        flattenedMatrix = cbaStandardsMFIMatrix.flatten()
        reshapedMatrix = np.reshape(
            flattenedMatrix, (numberOfCalibrationSamples, len(cytokines)),
            order='F')
        flattenedMatrix2 = cbaStandardsMFIPlotPointsMatrix.flatten()
        reshapedMatrix2 = np.reshape(flattenedMatrix2,
                                     (numberOfPlotPoints, len(cytokines)),
                                     order='F')
        flattenedMatrix3 = cbaStandardsConcentrationMatrix.flatten()
        reshapedMatrix3 = np.reshape(
            flattenedMatrix3, (numberOfCalibrationSamples, len(cytokines)),
            order='F')
        flattenedMatrix4 = cbaStandardsConcentrationPlotPointsMatrix.flatten()
        reshapedMatrix4 = np.reshape(flattenedMatrix4,
                                     (numberOfPlotPoints, len(cytokines)),
                                     order='F')
        realCytokineList = []
        for cytokine in cytokines:
            realCytokineList.append(cytokine[0])
        dataValsList = []
        plotPointsList = []
        for j in range(1, numberOfCalibrationSamples + 1):
            dataValsList.append([j])
        for j in range(1, numberOfPlotPoints + 1):
            plotPointsList.append([j])
        dataValsIndex = pd.MultiIndex.from_tuples(dataValsList,
                                                  names=['Standard'])
        plotPointsIndex = pd.MultiIndex.from_tuples(plotPointsList,
                                                    names=['Standard'])
        currentCBAStandardsMFIDf = pd.DataFrame(reshapedMatrix,
                                                index=dataValsIndex,
                                                columns=realCytokineList)
        currentCBAPlotPointsMFIDf = pd.DataFrame(reshapedMatrix2,
                                                 index=plotPointsIndex,
                                                 columns=realCytokineList)
        currentCBAStandardsConcentrationDf = pd.DataFrame(
            reshapedMatrix3, index=dataValsIndex, columns=realCytokineList)
        currentCBAPlotPointsConcentrationDf = pd.DataFrame(
            reshapedMatrix4, index=plotPointsIndex, columns=realCytokineList)

        currentCBAStandardsMFIDf.columns.name = 'Cytokine'
        currentCBAPlotPointsMFIDf.columns.name = 'Cytokine'
        currentCBAStandardsConcentrationDf.columns.name = 'Cytokine'
        currentCBAPlotPointsConcentrationDf.columns.name = 'Cytokine'
        mic1 = pd.MultiIndex.from_tuples(cytokines, names=['Cytokine'])
        print(cytokines)
        print(mic1)
        fittingParametersDf = pd.DataFrame(
            fittingParameters,
            index=mic1,
            columns=['Amplitude', 'EC50', 'HillCoeff', 'Background'])
        mic2 = pd.MultiIndex.from_tuples([['MFI', 'Lower'], ['MFI', 'Upper'],
                                          ['Concentration', 'Lower'],
                                          ['Concentration', 'Upper']])
        concLODDf = pd.DataFrame(concLOD, index=mic1, columns=mic2)

        concLODList.append(concLODDf)
        fittingParametersList.append(fittingParametersDf)
        cbaStandardsMFIList.append(currentCBAStandardsMFIDf)
        cbaPlotPointsMFIList.append(currentCBAPlotPointsMFIDf)
        cbaStandardsConcentrationList.append(
            currentCBAStandardsConcentrationDf)
        cbaPlotPointsConcentrationList.append(
            currentCBAPlotPointsConcentrationDf)

    #fullFittingParametersDf = pd.concat(fittingParametersList,keys=kitNames,names=['Kit Name'])
    #fullConcLODDf = pd.concat(concLODList,keys=kitNames,names=['Kit Name'])
    fullFittingParametersDf = pd.concat(fittingParametersList)
    fullConcLODDf = pd.concat(concLODList)

    print(fullFittingParametersDf)
    print(fullConcLODDf)

    fullCBAStandardsMFIDf = pd.concat(cbaStandardsMFIList,
                                      keys=kitNames,
                                      names=['Kit Name'],
                                      axis=1)
    fullCBAPlotPointsMFIDf = pd.concat(cbaPlotPointsMFIList,
                                       keys=kitNames,
                                       names=['Kit Name'],
                                       axis=1)
    fullCBAStandardsConcentrationDf = pd.concat(cbaStandardsConcentrationList,
                                                keys=kitNames,
                                                names=['Kit Name'],
                                                axis=1)
    fullCBAPlotPointsConcentrationDf = pd.concat(
        cbaPlotPointsConcentrationList,
        keys=kitNames,
        names=['Kit Name'],
        axis=1)
    fullCBAStandardsList = [
        fullCBAStandardsMFIDf.stack().stack(),
        fullCBAStandardsConcentrationDf.stack().stack()
    ]
    fullCBAPlotPointsList = [
        fullCBAPlotPointsMFIDf.stack().stack(),
        fullCBAPlotPointsConcentrationDf.stack().stack()
    ]
    fullCBAStandardsDf = pd.concat(fullCBAStandardsList,
                                   axis=1,
                                   keys=[yaxistitle, xaxistitle])
    fullCBAPlotPointsDf = pd.concat(fullCBAPlotPointsList,
                                    axis=1,
                                    keys=[yaxistitle, xaxistitle])

    plottingPointsDf = fullCBAPlotPointsDf.reset_index()
    plottingStandardsDf = fullCBAStandardsDf.reset_index()

    numCyt = len(pd.unique(plottingPointsDf['Cytokine']))
    if numCyt <= 10:
        fullpalette = sns.color_palette(sns.color_palette(), numCyt)
    else:
        fullpalette = sns.color_palette('hls', numCyt)
    g = sns.relplot(data=plottingPointsDf,
                    x=xaxistitle,
                    y=yaxistitle,
                    hue='Cytokine',
                    col='Kit Name',
                    kind='line',
                    col_order=pd.unique(plottingPointsDf['Kit Name']),
                    hue_order=pd.unique(plottingPointsDf['Cytokine']),
                    height=10,
                    palette=fullpalette)
    #Plot vertical lines at lower and upper concentration limits of detection
    colorDict = {}
    for j, cytokine in enumerate(pd.unique(plottingPointsDf['Cytokine'])):
        colorDict[cytokine] = fullpalette[j]
    for axis, kitName in zip(g.axes.flat,
                             pd.unique(plottingPointsDf['Kit Name'])):
        currentpalette = []
        for cytokine in pd.unique(plottingStandardsDf[
                plottingPointsDf['Kit Name'] == kitName]['Cytokine']):
            currentColor = colorDict[cytokine]
            currentpalette.append(currentColor)
            cytokineLODValues = fullConcLODDf.loc[cytokine, :]['Concentration']
            axis.axvline(x=cytokineLODValues['Lower'].values,
                         color=currentColor,
                         linestyle=':')
            axis.axvline(x=cytokineLODValues['Upper'].values,
                         color=currentColor,
                         linestyle=':')
        g2 = sns.scatterplot(data=plottingStandardsDf[
            plottingStandardsDf['Kit Name'] == kitName],
                             x=xaxistitle,
                             y=yaxistitle,
                             hue='Cytokine',
                             ax=axis,
                             legend=False,
                             palette=currentpalette)
        axis.set_xscale('log')
        axis.set_yscale('log')
    plt.savefig('plots/calibrationCurves-' + folderName + '-' +
                concUnitPrefix + '.png')
    #Save fitting parameters and LOD for curve fit for each cytokine
    with open(
            'misc/fittingParameters-' + folderName + '-' + concUnitPrefix +
            '.pkl', "wb") as f:
        pickle.dump(fullFittingParametersDf, f)
    with open(
            'misc/LODParameters-' + folderName + '-' + concUnitPrefix + '.pkl',
            "wb") as f:
        pickle.dump(fullConcLODDf, f)
Esempio n. 17
0
g2.set_xticklabels(labels, rotation=0)
fig.add_subplot(2, 2, 3)
g3 = sns.countplot(x='Transmission', data=dt_train)
loc, labels = plt.xticks()
g3.set_xticklabels(labels, rotation=0)
fig.add_subplot(2, 2, 4)
g4 = sns.countplot(x='Owner_Type', data=dt_train)
loc, labels = plt.xticks()
g4.set_xticklabels(labels, rotation=0)
plt.show()

fig = plt.figure(figsize=(15, 15))
fig.subplots_adjust(hspace=0.2, wspace=0.2)
ax1 = fig.add_subplot(2, 2, 1)
plt.xlim([0, 100000])
p1 = sns.scatterplot(x="Kilometers_Driven", y="Price", data=dt_train)
loc, labels = plt.xticks()
ax1.set_xlabel('Kilometer')

ax2 = fig.add_subplot(2, 2, 2)
#plt.xlim([0, 100000])
p2 = sns.scatterplot(x="Mileage_upd", y="Price", data=dt_train)
loc, labels = plt.xticks()
ax2.set_xlabel('Mileage')

ax3 = fig.add_subplot(2, 2, 3)
#plt.xlim([0, 100000])
p3 = sns.scatterplot(x="Engine_upd", y="Price", data=dt_train)
loc, labels = plt.xticks()
ax3.set_xlabel('Engine')
axes[1, 1].set_xlabel('Price', fontsize=14)
axes[1, 1].set_ylabel('HP', fontsize=14)
axes[1, 1].yaxis.set_label_position("right")
axes[1, 1].yaxis.tick_right()
axes[1, 1].set(ylim=(40, 160))

plt.show()

f, axes = plt.subplots(1, 2, figsize=(14, 4))

sns.distplot(dataset['KM'], ax=axes[0])
axes[0].set_xlabel('KM', fontsize=14)
axes[0].set_ylabel('Count', fontsize=14)
axes[0].yaxis.tick_left()

sns.scatterplot(x='Price', y='KM', data=dataset, ax=axes[1])
axes[1].set_xlabel('Price', fontsize=14)
axes[1].set_ylabel('KM', fontsize=14)
axes[1].yaxis.set_label_position("right")
axes[1].yaxis.tick_right()

plt.show()

fuel_list = Counter(dataset['FuelType'])
labels = fuel_list.keys()
sizes = fuel_list.values()

f, axes = plt.subplots(1, 2, figsize=(14, 4))

sns.countplot(dataset['FuelType'], ax=axes[0], palette="Set1")
axes[0].set_xlabel('Fuel Type', fontsize=14)
p.set_xticklabels(p.get_xticklabels(), rotation=90)
plt.tight_layout()
plt.show()

# In[27]:

#No. of hours spent on each Offence by district
df = data.groupby(["DISTRICT", "OFFENSE_CODE_GROUP"
                   ])["HOUR"].sum().reset_index().sort_values("HOUR",
                                                              ascending=False)
df
fig = plt.figure(figsize=(12, 12))
ax = fig.add_subplot(111)
p = sns.scatterplot(x="HOUR",
                    y="OFFENSE_CODE_GROUP",
                    hue="DISTRICT",
                    data=df,
                    palette="summer")
p.set_ylabel("No. of Crimes Occurred")
p.set_xlabel("Hours")
plt.tight_layout()
plt.show()

# In[41]:

#Year wise percentage rate
yrlbl = data['YEAR'].astype('category').cat.categories.tolist()
yrwisecount = data['YEAR'].value_counts()
sizes = [yrwisecount[year] for year in yrlbl]
fig1, ax1 = plt.subplots()
ax1.pie(sizes, labels=yrlbl, autopct='%1.1f%%', shadow=True)
Esempio n. 20
0
    def plot_classification(self, figsize=(16, 9)) -> Dict:
        import seaborn as sns
        import matplotlib.pyplot as plt
        from matplotlib import gridspec
        from pandas.plotting import register_matplotlib_converters

        # get rid of deprecation warning
        register_matplotlib_converters()

        probability_cutoff = self.probability_cutoff
        pc = self.probability_cutoff
        plots = {}

        for target in unique_top_level_columns(
                self.df) if self.df.columns.nlevels == 3 else [None]:
            # get target and frame
            df = self.df[target] if target is not None else self.df

            # define grid
            fig = plt.figure(figsize=figsize)
            gs = gridspec.GridSpec(2, 1, height_ratios=[1, 3])
            ax0 = plt.subplot(gs[0])
            ax1 = plt.subplot(gs[1])

            # plot probability
            bar = sns.lineplot(x=range(len(df)),
                               y=df[PREDICTION_COLUMN_NAME].iloc[:, 0],
                               ax=ax0)
            ax0.hlines(probability_cutoff,
                       0,
                       len(df),
                       color=sns.xkcd_rgb['silver'])

            # plot loss
            color = pd.Series(0, index=df.index)
            color.loc[(df[PREDICTION_COLUMN_NAME].iloc[:, 0] > pc)
                      & df[LABEL_COLUMN_NAME].iloc[:, 0] > pc] = 1
            color.loc[(df[PREDICTION_COLUMN_NAME].iloc[:, 0] <= pc)
                      & df[LABEL_COLUMN_NAME].iloc[:, 0] > pc] = 2

            colors = {
                0: sns.xkcd_rgb['white'],
                1: sns.xkcd_rgb['pale green'],
                2: sns.xkcd_rgb['cerise']
            }
            palette = [
                colors[color_index] for color_index in np.sort(color.unique())
            ]

            sns.scatterplot(ax=ax1,
                            x=range(len(df)),
                            y=df[GROSS_LOSS_COLUMN_NAME].iloc[:,
                                                              0].clip(upper=0),
                            size=df[GROSS_LOSS_COLUMN_NAME].iloc[:, 0] * -1,
                            hue=color,
                            palette=palette)

            plt.close()
            plots[target] = fig

        return plots
def plot(x, y):
    '''
    plots points given coordinate lists
    '''
    fig = sns.scatterplot(x, y).get_figure()
    fig.savefig('plot.png')
Esempio n. 22
0
filter = np.all(
    np.array([
        joined_df['infections_based_on_cases_as_percent_of_infectious'].notna(
        ).values,
        #joined_df['infections_based_on_cases_as_percent_of_infectious'] < 10,
        #joined_df['case_based_infectious_population'] > 100,
    ]),
    axis=0)
data = joined_df.reset_index()[filter]

sns.boxplot(data=data,
            x='maxtempC',
            y='infections_based_on_deaths_as_percent_of_infectious')

sns.scatterplot(data=data,
                x='maxtempC',
                y='infections_based_on_deaths_as_percent_of_infectious',
                hue="Country/Region")

sns.scatterplot(data=data,
                x='maxtempC',
                y='infections_based_on_deaths_as_percent_of_infectious',
                hue="case_based_infectious_population")

line_df = data[data['Country/Region'] == 'France'].set_index('date')[[
    'new_cases', 'infections_based_on_cases',
    'case_based_infectious_population'
]]

sns.lineplot(data=line_df)

ax2 = plt.twinx()
Esempio n. 23
0
    model = LinearRegression().fit(X.T, Y[i])
    #model = LinearRegression().fit(X[2].reshape(-1,1), Y[0])
    r_sq = model.score(X.T, Y[i])
    #r_sq = model.score(X[2].reshape(-1,1), Y[2])
    y_pred2 = model.predict(X2.T)
    #y_pred= model.predict(X[2].reshape(-1,1))
    intercept, coefficients = model.intercept_, model.coef_

    #---------------------------------------------------------------------------------------

    x_labels = [0.1, 0.5, 1, 2, 5, 10]
    y_labels = [0, 0.2, 0.4, 0.6, 0.8, 1.0]

    fig, axes = plt.subplots(1, 3, figsize=(15, 5), sharey=True)
    #fig.suptitle(string1.format(c0,c1,c2,indir_01,indir_02,indir_21,r,R))
    fig.suptitle("Linear regression for X")

    sns.scatterplot(ax=axes[0], x=y_pred2[:-2], y=Y2[i][:-2])
    axes[0].set_title("x= y_pred; y=Y_ideal")

    sns.scatterplot(ax=axes[1], x=Y2[i][:-2], y=Y2[i][:-2])
    axes[1].set_title("x=Y_ideal; y=Y_ideal")

    #sns.plot(ax=axes[2], x=y_pred2[:-2]-Y2[2][:-2],y=np.arange(10000))
    #axes[2].set_title("x=delta t; error")

    plt.plot(y_pred2[:-2] - Y2[i][:-2], 'k')
    plt.xlabel('time_steps')
    plt.ylabel('error')
    plt.title('Linear regression')
Esempio n. 24
0
)
print(imdb['color'].value_counts(normalize=True))  # Normalizar os valores
# print(imdb['director_name'].value_counts().tail(20))    # Lista os Diretores e quantos filmes fizeram e depois Pega as ultimas linhas
a = 1

# Pegar a coluna das cores      #### REfazer esta parte
color_or_bw = imdb.query(
    "color in ['Color', ' Black and White']"
)  # Criar nova tabela para estudar a importância/relação de filmes coloridos/PeB (remove os demais)
color_or_bw = color_or_bw.dropna().query(
    'budget > 0  |  gross > 0'
)  # Remove as linhas sem dados (dropna()) e as linhas com dados
color_or_bw['color_0_ou_1'] = (
    color_or_bw['color'] == 'Color'
) * 1  # Tentativa de Criar uma coluna que torna a variável Preto/Branco em binária
sns.scatterplot(data=color_or_bw, x="color_0_ou_1", y="gross")
plot.show()
print('\n\nDataframe organizado por filmes coloridos Ou preto e branco')
print(color_or_bw["color_0_ou_1"].value_counts())
# color_or_bw['color_0_ou_1'] = color_or_bw['color'] == 'Color'     # Outra forma de fazer, mas não etá aceitando
# df["b"] = df["value"] == 3                                    # Exemplo
# print(f'\n\n Tamanho da variável: {len(color_or_bw)}')

### Verificar gasto e ganho apenas com os filmes dos USA, devido a imprecissão de conversão monetária
imdb2 = imdb.drop_duplicates()
print(
    f'\nHaviam {len(imdb)} colunas e após remover os duplicado, passamos a ter {len(imdb2)} colunas.\nOu seja, haviam {len(imdb)-len(imdb2)} filmes duplicados'
)
imdb_usa = imdb2.query('country == "USA"')
budget_gross = imdb_usa[[
    'budget', 'gross'
Esempio n. 25
0

x_merged_to_y = pd.concat([X_data_visualize, y], axis=1)
x_merged_to_y.shape


# In[12]:


print(x_merged_to_y)


# In[13]:


ax = sns.scatterplot(x="Amount",y="Class",hue="Class",data=x_merged_to_y)


# In[15]:


fig = plt.figure(figsize=(15,8))
fig.subplots_adjust(hspace=0.6, wspace=0.8)
for i in range(1, 9):
    plt.subplot(2, 4, i)
    sns.scatterplot(x="V"+str(i),y="Class",hue="Class",data=x_merged_to_y)


# In[16]:

Esempio n. 26
0
print(data.groupby(
    ['Team',
     'Medal']).Medal.agg('count'))  # Calulates the medals won by each team
print(data.groupby(['Sex', 'Medal'
                    ]).Sex.agg('count'))  # Calulates the medals won by Sex
print(
    'The different types of sport for the athletes and their participation\n',
    data[data.Sport.fillna('None') != 'None'].Sport.value_counts())
# Calculates all the sports and their participation
print('Total number of women participants',
      len(data[data.Sex == 'F'].Name.unique()))  # Women participation
print('Total number of men participants',
      len(data[data.Sex == 'M'].Name.unique()))  # Men participation
f_year_count = data[data.Sex == 'F'].groupby('Year').agg('count').Name
m_year_count = data[data.Sex == 'M'].groupby('Year').agg('count').Name
plot = (sns.scatterplot(data=m_year_count), sns.scatterplot(data=f_year_count))
plt.show()  # Data visulation plot of male and female participants

male_data = data[data.Sex == 'M']
print(
    male_data.groupby(['Sport'])['Weight',
                                 'Height'].agg(['min', 'max', 'mean']))
female_data = data[data.Sex == 'F']
print(
    female_data.groupby(['Sport'])['Weight',
                                   'Height'].agg(['min', 'max', 'mean']))
# It calculates the height and weight of male and female in the different sports
sport_min_year = male_data.groupby('Sport').Year.agg(
    ['min', 'max'])['min'].sort_values('index')
year_count = Counter(sport_min_year)
year = list(year_count.keys())
Esempio n. 27
0
def plot_residuals(actual, prediction):

    residual = prediction - actual

    sns.scatterplot(x=actual, y=residual)
    plt.title("residuals")
Esempio n. 28
0
    tsne = TSNE(n_components=2, perplexity=15, random_state=1000)
    data_tsne = tsne.fit_transform(sdf)

    df_tsne = pd.DataFrame(data_tsne, columns=['x', 'y'], index=cdf.index)
    dff = pd.concat([cdf, df_tsne], axis=1)

    # Show the dataset
    sns.set()

    fig, ax = plt.subplots(figsize=(18, 11))

    with sns.plotting_context("notebook", font_scale=1.5):
        sns.scatterplot(x='x',
                        y='y',
                        size='Age',
                        sizes=(30, 400),
                        palette=sns.color_palette("husl", 2),
                        data=dff,
                        ax=ax)

    ax.set_xlabel(r'$x$', fontsize=14)
    ax.set_ylabel(r'$y$', fontsize=14)

    plt.show()

    # Perform the preliminary analysis
    n_clusters = []
    n_noise_points = []
    silhouette_scores = []
    calinski_harabaz_scores = []
        norm_count = counts[p]
    if p in shuffled_counts:
        shuf_count = shuffled_counts[p]
    diff_dict['predicate'].append(p)
    diff_dict['normal'].append(norm_count)
    diff_dict['difference'].append(norm_count - shuf_count)

sns.barplot(diff_dict['predicate'], diff_dict['difference'])
plt.xticks(rotation=90)
plt.tight_layout()
plt.ylabel('Difference (Normal Count - Shuffled Count)')
plt.xlabel('Predicate')
plt.savefig('Figures/AA_TOF_norm_shuffled_change.png')
plt.show()

sns.scatterplot(diff_dict['normal'], diff_dict['difference'])
plt.xticks(rotation=90)
plt.tight_layout()
plt.ylabel('Normal vs Difference')
plt.xlabel('Normal Count')
plt.savefig('Figures/AA_TOF_difference_v_norm_scatter.png')
plt.show()

# plot path lengths:
length_df = pd.concat(
    [pd.DataFrame(path_lengths),
     pd.DataFrame(shuffled_path_lengths)])
sns.boxplot(length_df['type'], length_df['length'])
plt.savefig('Figures/AA_TOF_shuffled_length_boxplots.png')
plt.show()
        #print(columnGene2)
        #print(len(columnCodonPosition2))
        #print(len(columnDepth2))
        #print(len(columnGene2))
        d = {
            "CodonPosition": columnCodonPosition2,
            'CodonDepth': columnDepth2,
            "Genetype": columnGene2
        }
        df = pd.DataFrame(data=d)
        #print(columnCodonPosition)
        #cmap = sns.cubehelix_palette(dark=color, light=.8, as_cmap=True)
        sns.scatterplot(x="CodonPosition",
                        y="CodonDepth",
                        hue="Genetype",
                        palette=customPalette,
                        s=30,
                        alpha=0.9,
                        data=df).set_title("strain " + listofSamples[x])
        #print("test")
        #plt.show()
        plt.savefig("strain " + "_" + listofSamples[x] + "_" + str(gene) +
                    '.png')
        plt.clf()
    #print(df)
    #df.to_csv("test2.csv")
    #break
#    cmap = sns.cubehelix_palette(dark=.3, light=.8, as_cmap=True)
#   cmap = sns.cubehelix_palette(dark=.3, light=.8, as_cmap=True)

#d = {"CodonPosition": columnCodonPosition, 'CodonDepth':columnDepth, "Genetype":columnGene}
Esempio n. 31
0
"""
Scatterplot with continuous hues and sizes
==========================================

_thumb: .45, .45

"""

import seaborn as sns
sns.set()

# Load the example planets dataset
planets = sns.load_dataset("planets")

cmap = sns.cubehelix_palette(rot=-.2, as_cmap=True)
ax = sns.scatterplot(x="distance", y="orbital_period",
                     hue="year", size="mass",
                     palette=cmap, sizes=(10, 200),
                     data=planets)
iowa_train = pd.read_csv('train.csv')
iowa_train.head()


iowa_train['SalePrice'].describe()
# ALl prices are greater than zero 

# Looking at Skewness and Kurtosis 
sns.distplot(iowa_train['SalePrice'])


# Sknewness and Kurtosis
print(iowa_train['SalePrice'].skew())
print(iowa_train['SalePrice'].kurt())



# Visualzing the Numeric Variables Through ScatterPlots

# Extracting Numeric Variables from Data Frame 
numerics = ['int16', 'int32', 'int64', 'float16', 'float32', 'float64']
num_iowatrain = iowa_train.select_dtypes(include = numerics)  
num_iowatrain.head()

num_iowatrain.columns

Gr = sns.scatterplot(x = 'GrLivArea', y = 'SalePrice', data = num_iowatrain)
Lot = sns.scatterplot(x = 'LotArea', y = 'SalePrice', data = num_iowatrain)
Mas = sns.scatterplot(x = 'MasVnrArea', y = 'SalePrice', data = num_iowatrain
"""
Scatterplot with categorical and numerical semantics
====================================================

_thumb: .45, .5

"""
import seaborn as sns
import matplotlib.pyplot as plt
sns.set(style="whitegrid")

# Load the example diamonds dataset
diamonds = sns.load_dataset("diamonds")

# Draw a scatter plot while assigning point colors and sizes to different
# variables in the dataset
f, ax = plt.subplots(figsize=(6.5, 6.5))
sns.despine(f, left=True, bottom=True)
clarity_ranking = ["I1", "SI2", "SI1", "VS2", "VS1", "VVS2", "VVS1", "IF"]
sns.scatterplot(x="carat", y="price",
                hue="clarity", size="depth",
                palette="ch:r=-.2,d=.3_r",
                hue_order=clarity_ranking,
                sizes=(1, 8), linewidth=0,
                data=diamonds, ax=ax)
Esempio n. 34
0
def plot_data(data):
    X = data[0]
    Y = data[1]
    sns.scatterplot(X[0], X[1], hue=Y[0])
    plt.show()