Esempio n. 1
0
def cell_cycle_phase_barplot(adata, palette='Set2'):
    """Plots the proportion of cells in each phase of the cell cycle

    See also: cell_cycle_phase_pieplot for the matplotlib pie chart


    Parameters
    -----------
    adata: AnnData
        The AnnData object being used for the analysis. Must be previously
        evaluated by `tl.annotate_cell_cycle`.

    Returns
    -----------
    A plotnine barplot with the total counts of cell in each phase of the
    cell cycle.

    """
    plt_data = adata.obs.copy()
    plt_data['cell_cycle_phase'] = pd.Categorical(
        plt_data['cell_cycle_phase'],
        categories=['G1 post-mitotic', 'G1 pre-replication', 'S/G2/M'])

    cycle_plot = (
        ggplot(plt_data, aes('cell_cycle_phase', fill='cell_cycle_phase')) +
        geom_bar() + coord_flip() + guides(fill=False) +
        labs(y='', x='Cell cycle phase') + theme_light() +
        theme(panel_grid_major_y=element_blank(),
              panel_grid_minor_y=element_blank(),
              panel_grid_major_x=element_line(size=1.5),
              panel_grid_minor_x=element_line(size=1.5)) +
        scale_fill_brewer(type='qual', palette=palette))

    return cycle_plot
Esempio n. 2
0
def plot_fees(fees, title, y_axis, years, filename):
    p = pn.ggplot(fees, pn.aes('year', y_axis, color = 'conference', shape = 'conference')) + \
        pn.geom_point() + \
        pn.geom_line() + \
        pn.labs(title = title, x = 'Year', y = 'Fee (€)') + \
        pn.ylim(0, 1000) + \
        pn.theme_light() + \
        pn.scale_x_continuous(breaks = years) + \
        pn.scale_colour_discrete(name = 'Conference') + \
        pn.scale_shape_discrete(name = 'Conference')

    p.save(filename, width=6, height=3, dpi=300)
Esempio n. 3
0
    def plot_series(self, y: str,
                    series_state_df: pd.DataFrame) -> plotnine.ggplot:
        """
        """
        aes_kwargs = dict(x="round_number",
                          y=y,
                          color="factor(game_id)",
                          group="factor(game_id)")

        return (plotnine.ggplot(series_state_df) + plotnine.aes(**aes_kwargs) +
                plotnine.geom_point() + plotnine.geom_line() +
                plotnine.theme_light() +
                plotnine.scale_x_continuous(breaks=range(1, 20, 1)))
Esempio n. 4
0
    def create_state_plot(
            self,
            x: str,
            y: str,
            data: pd.DataFrame,
            aes_kwargs: Tuple[str, str] = None) -> plotnine.ggplot:
        """
        """
        aes_kwargs = ({} if aes_kwargs is None else aes_kwargs)

        x_breaks = data[x]

        max_y = max(data[y])
        y_interval = int(max_y / 20) + 1
        y_breaks = range(0, max_y + 1, y_interval)

        return (plotnine.ggplot(data) + plotnine.aes(x=x, y=y, **aes_kwargs) +
                plotnine.geom_line() + plotnine.geom_point() +
                plotnine.theme_light() +
                plotnine.scale_x_continuous(breaks=x_breaks) +
                plotnine.scale_y_continuous(breaks=y_breaks))
Esempio n. 5
0
    def test_theme_light(self):
        p = self.g + labs(title='Theme Light') + theme_light()

        assert p + _theme == 'theme_light'
Esempio n. 6
0
def plot_2d_distribution_per_category(
        dataframe: pandas.DataFrame,
        label_column: str,
        coordinates: Tuple[str],
        colors: List[str],
        coloring_style: str = 'manual',
        log_10_scale: bool = False,
        theme: str = 'gray',
        alpha: float = 0.5,
        save_to_file: str = None,
        dpi: int = 150
) -> p9.ggplot:
    """
    The :func:`plot_2d_distribution_per_category` helps with providing the user with a 2-dimensional plot of the
    whole distribution.

    Parameters
    ----------
    dataframe: `pandas.DataFrame`, required
        This is the main parameter that this method is supposed to work with, which is a dataframe with a label column
        (which is to help us determine the column) and coordinates for x and y axes.
    label_column: `str`, required
        The input dataframe must have a label_column (preferably integer starting from 0), the name of that
        column should be input here.
    coordinates: `Tuple[str]`, required
        This is a tuple of column names, the first one being the column in which the `x` values for our 2d plot
        are stored, and the other one corresponds to the `y` axis.
    colors: `List[str]`, required
        Depending on whether or not our `coloring_style` is manual or automatic, this can either be a list of colors
        or a list of two colors indicating a range of color values.
    coloring_style: `str`, optional (default='manual')
        Either `manual` or `gradient` which helps assigning colors to clusters.
    log_10_scale: `bool`, optional (default=False)
        If the user wants to take the logarithm in the basis of 10, this parameter should be set to 1.
    theme: `str`, optional (default='gray')
        This is the `theme` types, the acceped values are: ``['gray', 'dark', 'seaborn', 'light']``, the values
        are consistent with `plotnine` package's format.
    alpha: `float`, optional (default=0.5)
        The transparency intensity can be determined by setting this parameter.
    save_to_file: `str`, optional (default=None)
        If the user intends to save the plot in a file, this parameter should have a value. The value must be a filepath.
    dpi: `int`, optional (default=150)
        The dpi for saving the plots indicating the image quality.
    Returns
    ----------
    The output of this method is of `p9.ggplot` type.
    """
    assert coloring_style in ['manual', 'gradient'], "invalid coloring style"

    if coloring_style == 'gradient':
        assert len(colors) == 2, "you have chosen gradient style coloring, for colors you have to provide a list with the \
            First element being the color for low and the second the color for high."
        pplot = p9.ggplot(data=dataframe, mapping=p9.aes(x=coordinates[0], y=coordinates[1], color=label_column))
        pplot += p9.scale_color_gradient(low=colors[0], high=colors[1])
    elif coloring_style == 'manual':
        assert len(colors) == len(dataframe[label_column].unique()), "You have chosen per category manual coloring, therefore you have to provide the same number of colors"
        pplot = p9.ggplot(data=dataframe, mapping=p9.aes(x=coordinates[0], y=coordinates[1], color='factor(' + label_column + ')'))
        pplot += p9.scale_alpha_manual(colors)

    pplot += p9.geom_point(alpha=alpha)
    pplot += p9.xlab(coordinates[0]) + p9.ylab(coordinates[1])

    if log_10_scale:
        pplot += p9.scale_x_log10()

    if theme == 'gray':
        pplot += p9.theme_gray()
    elif theme == 'dark':
        pplot += p9.theme_dark()
    elif theme == 'seaborn':
        pplot += p9.theme_seaborn()
    elif theme == 'light':
        pplot += p9.theme_light()
    else:
        raise Exception('Theme type not supported, please add.')

    pplot += p9.theme(text=p9.element_text(size=8))

    if save_to_file is not None:
        save_directory, filename = separate_path_and_file(filepath=save_to_file)
        pplot.save(filename=filename, path=save_directory, dpi=dpi)
    else:
        pplot.draw()

    return pplot
Esempio n. 7
0
def plot_violinbox_plots_per_category(
        dataframe: pandas.DataFrame,
        plot_type: str,
        target_feature: str,
        label_column: str,
        colors: List[str],
        coloring_style: str,
        value_skip_list: List = [],
        jitter_alpha: float = 0.7,
        plot_alpha: float = 0.5,
        log_10_scale: bool = False,
        theme: str = 'gray',
        save_to_file: str = None,
        dpi: int = 150,
        show: bool = True
) -> p9.ggplot:
    """
        The :func:`plot_violinbox_plots_per_category` helps with providing the user with nicely plotted violin and
        box plots of the distribution of data points.

        Parameters
        ----------
        dataframe: `pandas.DataFrame`, required
            This is the main parameter that this method is supposed to work with, which is a dataframe that has
            a label column in which we have integer values starting from 0, and a float feature column the distribution
            of which we tend to monitor.
        plot_type: `str`, required
            This value, either `box` or `violin`, determines the type of plot.
        target_feature: `str`, required
            This parameter is the column name of the features that we want to monitor.
        label_column: `str`, required
            The input dataframe must have a label_column (preferably integer starting from 0), the name of that
            column should be input here.
        colors: `List[str]`, required
            Depending on whether or not our `coloring_style` is manual or automatic, this can either be a list of colors
            or a list of two colors indicating a range of color values.
        coloring_style: `str`, optional (default='manual')
            Either `manual` or `gradient` which helps assigning colors to clusters.
        value_skip_list: `List`, optional (default=[])
            If some values in the feature column are to be skipped, they should be put in here so that they
            are ignored in the plots. For example, if for some reason some values are -10000000, they can be taken care
            of in here.
        jitter_alpha: `float`, optional (default=0.7)
            The jitter value transparency is set in this parameter.
        plot_alpha: `float`, optional (default=0.5)
            The transparency intensity can be determined by setting this parameter.
        log_10_scale: `bool`, optional (default=False)
            If the user wants to take the logarithm in the basis of 10, this parameter should be set to 1.
        theme: `str`, optional (default='gray')
            This is the `theme` types, the acceped values are: ``['gray', 'dark', 'seaborn', 'light']``, the values
            are consistent with `plotnine` package's format.
        save_to_file: `str`, optional (default=None)
            If the user intends to save the plot in a file, this parameter should have a value. The value must be a filepath.
        dpi: `int`, optional (default=150)
            The dpi for saving the plots indicating the image quality.
        show: `bool`, optional (default=True)
            Whether or not the plot is to be shown is set in this parameter.
        Returns
        ----------
        The output of this method is of `p9.ggplot` type.
        """
    if len(value_skip_list) > 0:
        df = dataframe[~dataframe[target_feature].isin(value_skip_list)]

    if coloring_style == 'gradient':
        assert len(colors) == 2, "you have chosen gradient style coloring, for colors you have to provide a list with the \
            First element being the color for low and the second the color for high."
        pplot = p9.ggplot(data=dataframe, mapping=p9.aes(x='factor(' + label_column + ')', y=target_feature, color=label_column))
        pplot += p9.scale_color_gradient(low=colors[0], high=colors[1])
    elif coloring_style == 'manual':
        assert len(colors) == len(df[label_column].unique()), "You have chosen per category manual coloring, therefore you have to provide the same number of colors"
        pplot = p9.ggplot(data=dataframe, mapping=p9.aes(x='factor(' + label_column + ')', y=target_feature, color='factor(' + label_column + ')'))
        pplot += p9.scale_alpha_manual(colors)

    pplot += p9.geom_jitter(alpha=jitter_alpha)

    if plot_type == 'box':
        pplot += p9.geom_boxplot(alpha=plot_alpha)
    elif plot_type == 'violin':
        pplot += p9.geom_violin(alpha=plot_alpha)
    else:
        raise Exception('unknown plot type, it must be violin or box.')

    if theme == 'gray':
        pplot += p9.theme_gray()
    elif theme == 'dark':
        pplot += p9.theme_dark()
    elif theme == 'seaborn':
        pplot += p9.theme_seaborn()
    elif theme == 'light':
        pplot += p9.theme_light()
    else:
        raise Exception('Theme type not supported, please add.')

    if log_10_scale:
        pplot += p9.scale_x_log10()

    if save_to_file is not None:
        save_directory, filename = separate_path_and_file(filepath=save_to_file)
        pplot.save(filename=filename, path=save_directory, dpi=dpi)

    if show:
        pplot.draw()

    return pplot
Esempio n. 8
0
ylim = [tsne_results_df.iloc[:, 1].min(), tsne_results_df.iloc[:, 1].max()]

plot = (p9.ggplot(tsne_results_df,
                    p9.aes(y=tsne_results_df.columns[1], 
                           x=tsne_results_df.columns[0],
                           group=clusters_colname,
                           color=clusters_colname
                           ))
        + p9.geom_point(size=2)
        + p9.geom_rug()
        + p9.stat_ellipse()
        + p9.xlim(xlim[0], xlim[1])
        + p9.ylim(ylim[0], ylim[1])
        #+ p9.scale_color_gradient(low='blue', high='yellow')
        #+ p9.scale_color_manual(values=colors)
        + p9.theme_light(base_size=18)
        + p9.ggtitle(plot_title)
        + p9.labs(y=y_axis_label,
                  x=x_axis_label)
        )

plot_filename = 'shap_clusters.png'
plot.save(plot_filename, width=10, height=10)
from IPython.display import Image
Image(filename=plot_filename)

# + [markdown]
'''
The plot above shows our 3 clusters in different colors and outlined by 
    ellipses.  To understand these clusters, we need to look at some statistics
    and/or plots that characterize them.  Let's take a look at the means for
Esempio n. 9
0
    def test_theme_light(self):
        p = self.g + labs(title='Theme Light') + theme_light()

        assert p + _theme == 'theme_light'
Esempio n. 10
0
from plotnine import theme_light, theme, element_blank, element_text, element_rect

theme_std = theme_light() + theme(panel_grid=element_blank(),
                                  strip_text=element_text(color='black'),
                                  strip_background=element_rect(fill='white'))
Esempio n. 11
0
def ghozzi_score_plot(prediction_result: pd.DataFrame, filename: str):
    """Plots case counts and detector predictions with ghozzi weighting.

    Parameters
    ----------
    prediction_result
        DataFrame containing 'alarm', 'county', 'pathogen', 'n_cases', 'n_outbreak_cases', 'outbreak'.
    filename
        File name to write the plot to.
    """
    # Outbreaks that were recognized.
    prediction_result["weighted_true_positives"] = (
        prediction_result.alarm * prediction_result.outbreak *
        prediction_result.n_outbreak_cases)

    # Outbreaks that were missed.
    prediction_result["weighted_false_negatives"] = (
        (1 - prediction_result.alarm) * prediction_result.outbreak *
        prediction_result.n_outbreak_cases)
    # Alarms that were falsely raised.
    prediction_result["weighted_false_positives"] = (
        prediction_result.alarm *
        (prediction_result.outbreak != prediction_result.alarm) *
        np.mean(prediction_result.query("outbreak").n_outbreak_cases))

    melted_prediction_result = (prediction_result.reset_index().rename(
        columns={
            "index": "date"
        }).melt(
            id_vars=[
                "date",
                "county",
                "pathogen",
                "n_cases",
                "n_outbreak_cases",
                "outbreak",
                "alarm",
            ],
            var_name="prediction",
            value_name="weighting",
        ))

    case_color = "grey"
    n_cols = 4
    n_filter_combinations = len(prediction_result[["county", "pathogen"
                                                   ]].drop_duplicates())

    chart = (gg.ggplot(melted_prediction_result, gg.aes(x="date")) +
             gg.geom_bar(
                 prediction_result,
                 gg.aes(x="prediction_result.index", y="n_cases"),
                 fill=case_color,
                 stat="identity",
             ) + gg.geom_line(gg.aes(y=0), color=case_color) + gg.geom_bar(
                 gg.aes(y="weighting", fill="prediction"), stat="identity") +
             gg.facet_wrap(["county", "pathogen"], ncol=n_cols) +
             gg.scale_x_date(date_breaks="4 month", date_labels="%Y-%m") +
             gg.ylab("# cases") + gg.scale_fill_manual(
                 name="weighting", values=["red", "orange", "green"]) +
             gg.theme(panel_grid_minor=gg.element_blank()) + gg.theme_light())
    chart.save(
        filename,
        width=5 * n_cols,
        height=4 * n_filter_combinations / n_cols,
        unit="cm",
        limitsize=False,
    )
Esempio n. 12
0
def plot_bar(data,nuclstr,column='value',factor=None,ymin=None,ymax=None,stat='identity',dpi=300,features=None,feature_types=['all'],add_features=[],funcgroups=None,shading_modes=['charge_functional'],usd=False,right_overhang_fix=None,debug=False,startnumber=1,cropseq=(0,None),aspect_ratio=None,reverse_seq=False,double_seq=False,transparent=True,fill_params=None,bar_position='stack',title=None):
    """
    A wrapper function to make a plot of data with bars along the sequnce
    input should be a dataframe with resid, segid column and 'value' 
    This one is inspired by seqplot/seqplot/pdb_plot.py
    """
    
    segid=data['segid'].values[0]
    
    if title is None:
        title="Segid: %s, Type: %s"%(segid,nuclstr.components[segid]['type'])
    
    seq=Seq(str(nuclstr.seqs[segid]['fullseq']),generic_protein \
                if nuclstr.components[segid]['entity'] is 'DNA' or 'histone' or 'protein' else generic_dna)
    msar=MultipleSeqAlignment([SeqRecord(seq=seq,id=nuclstr.components[segid]['type']+':'+segid,\
                                         name=nuclstr.components[segid]['type']+':'+segid)])
    if(reverse_seq):
        logger.info("Experimental feature will reverse the sequence")
        msar[0].seq=msar[0].seq[::-1]

    if double_seq:
          msar.add_sequence('reverse',str(msar[0].seq[::-1]))

        
    msar=msar[:,cropseq[0]:cropseq[1]]
        
    
#     print("Seq to plot:",msar)
             
    #We need to get starting residue, currently for DNA chains only cifseq gets it correctly
    resid_start=nuclstr.seqs[segid]['resid_start']
    
    logger.debug("Starting resid",resid_start)
    

    overhang=nuclstr.seqs[segid]['overhangL']
    
    datafixed=data.copy()
    datafixed.loc[:,'resid']=datafixed.loc[:,'resid']-resid_start+overhang+1-cropseq[0]

    
    sl=len(msar[0].seq)

#     fn=shade.seqfeat2shadefeat(msar,feature_types=feature_types,force_feature_pos='bottom',debug=debug)
    if features is None:
        fn=nuclstr.shading_features[segid]
    else:
        fn=features
    fn2=[]
    for i in fn:
        if (i['style'] in feature_types) or ('all' in feature_types) :
            fn2.append(i)
            
    fn2.extend(add_features)
    if usd:
        ruler='top'
    else:
        ruler=None
    shaded=ipyshade.shadedmsa4plot(msar,features=fn2,shading_modes=shading_modes,debug=debug,startnumber=startnumber,setends=[startnumber-2,sl+startnumber+2],funcgroups=funcgroups,ruler=ruler,density=200)
        
    #If sl%10=10 se will have a ruler number hanging beyond the sequence image, and we need to correct for that.
    if right_overhang_fix is None:
        if sl%10==0:
            if sl<100:
                rof= 0.1
            else:
                rof=0.5
        else:
            rof=0
    else:
        rof=right_overhang_fix
    if (not aspect_ratio is None ):
        ar=aspect_ratio
    else:
        ar=0.2*100./sl
#     print(datafixed)
    plot=(ggplot(data=datafixed,mapping=aes(x='resid', y=column))
#         + geom_point(size=0.1)
#           +geom_bar(stat='identity',width=0.5,mapping=aes(fill=factor))
        + scale_x_continuous(limits=(0.5,sl+0.5+rof),expand=(0,0.2),name='',breaks=[])
       # + scale_y_continuous(breaks=[0,0.5,1.0])
        + theme_light()+theme(aspect_ratio=ar,dpi=dpi,plot_margin=0,text=element_text(size=6), legend_key_size=5 ,legend_position='bottom',legend_direction='horizontal'))
    #+ facet_wrap('~ segid',dir='v') +guides(color=guide_legend(ncol=10))
    if factor is None:
        plot=plot+geom_bar(stat=stat,width=0.5)
    else:
        plot=plot+geom_bar(stat=stat,width=0.5,mapping=aes(fill=factor),position=bar_position)
        
    if fill_params is not None:
        plot=plot+scale_fill_manual(**fill_params)
    
    if not usd:
        if (ymax is not None) :
            plot=plot+scale_y_continuous(limits=(None,ymax))
    else:
        if (ymin is not None) :
            plot=plot+scale_y_continuous(limits=(ymin,None))
    
    if ymax is None:
        ymax=data[column].max()
    if ymin is None:
        ymin=data[column].min()
#     print(ymax)
    plot = plot + geom_seq_x(seqimg=shaded.img,\
                   xlim=(1,sl+rof),ylim=(ymin,ymax),usd=usd,aspect_ratio=ar,transparent=transparent)+ggtitle(title)
    
    
    return plot
Esempio n. 13
0
def plot_line(data,nuclstr,columns=['value'],ymin=None,ymax=None,dpi=300,features=None,feature_types=['all'],add_features=[],funcgroups=None,shading_modes=['charge_functional'],right_overhang_fix=None,debug=False,startnumber=1,cropseq=(0,None),aspect_ratio=None,reverse_seq=False,transparent=True,xshift=0):
    """
    A wrapper function to make a plot of data with bars along the sequnce
    input should be a dataframe with resid, segid column and 'value' 
    This one is inspired by seqplot/seqplot/pdb_plot.py
    funcgroup example fg="\\funcgroup{xxx}{CT}{White}{Green}{upper}{up} \\funcgroup{xxx}{GA}{White}{Blue}{upper}{up}"
    """
    if isinstance(columns,str):
        columns=[columns]
    segid=data['segid'].values[0]
    
    title="Segid: %s, Type: %s"%(segid,nuclstr.components[segid]['type'])

    seq=Seq(str(nuclstr.seqs[segid]['fullseq']),generic_protein \
                if nuclstr.components[segid]['entity'] is 'DNA' or 'histone' or 'protein' else generic_dna)
    msar=MultipleSeqAlignment([SeqRecord(seq=seq,id=nuclstr.components[segid]['type']+':'+segid,\
                                         name=nuclstr.components[segid]['type']+':'+segid)])
    if(reverse_seq):
        logger.info("Experimental feature will reverse the sequence")
        msar[0].seq=msar[0].seq[::-1]
        
    msar=msar[:,cropseq[0]:cropseq[1]]

    
#     print("Seq to plot:",msar)
             
    #We need to get starting residue, currently for DNA chains only cifseq gets it correctly
    resid_start=nuclstr.seqs[segid]['resid_start']
    
    logger.debug("Starting resid %d"%int(resid_start))
    

    overhang=nuclstr.seqs[segid]['overhangL']
    
    datafixed=data.copy()
    datafixed.loc[:,'resid']=datafixed.loc[:,'resid']-resid_start+overhang+1-cropseq[0]+xshift

#     print(datafixed)
    sl=len(msar[0].seq)

#     fn=shade.seqfeat2shadefeat(msar,feature_types=feature_types,force_feature_pos='bottom',debug=debug)
    if features is None:
        fn=nuclstr.shading_features[segid]
    else:
        fn=features
    fn2=[]
    for i in fn:
        if (i['style'] in feature_types) or ('all' in feature_types) :
            fn2.append(i)
            
    fn2.extend(add_features)
    shaded=ipyshade.shadedmsa4plot(msar,features=fn2,shading_modes=shading_modes,debug=debug,startnumber=startnumber,setends=[startnumber-2,sl+startnumber+2],funcgroups=funcgroups,density=200)
        
    #If sl%10=10 se will have a ruler number hanging beyond the sequence image, and we need to correct for that.
    if right_overhang_fix is None:
        if sl%10==0:
            if sl<100:
                rof= 0.1
            else:
                rof=0.5
        else:
            rof=0
    else:
        rof=right_overhang_fix
    if (not aspect_ratio is None ):
        ar=aspect_ratio
    else:
        ar=0.15*100./sl
        
    md=pd.melt(datafixed,id_vars=['segid','resid'],value_vars=columns)
#     print(md)
#     print(md)
#     print(md['variable'])
    plot=(ggplot(data=md,mapping=aes(x='resid', y='value'))
        + geom_point(aes(color='variable'),size=0.1)+geom_line(aes(color='variable'),stat='identity')
        + scale_x_continuous(limits=(0.5,sl+0.5+rof),expand=(0,0.2),name='',breaks=[])
#         + scale_y_continuous()
        + theme_light()+theme(aspect_ratio=ar,dpi=dpi,plot_margin=0)) #+ facet_wrap('~ segid',dir='v')

    if ymax is not None:
        plot=plot+scale_y_continuous(limits=(None,ymax))
    
    if ymin is None:
        ymin=md['value'].min()
    if ymax is None:
        ymax=md['value'].max()
    plot = plot + geom_seq_x(seqimg=shaded.img,\
                   xlim=(1,sl+rof),ylim=(ymin,ymax),aspect_ratio=ar,transparent=transparent)+ggtitle(title)
    

    
    return plot