Beispiel #1
0
def plot_tune_acc(results):
    '''Plot the model's tuning set accuracy over time'''
    metric_list = results['tune_acc']
    metric_name = 'Tuning Set Accuracy'

    plot = plot_metric(metric_name, metric_list)
    plot = plot + ggtitle(
        '{} vs epochs'.format(metric_name)) + make_baseline_geom(results)

    return plot
Beispiel #2
0
def plot_metric(metric_name, metric_list):
    '''This function is the generic plotting function for plotting a given metric

    Arguments
    ---------
    metric_name: string
        The human understandable name of the metric being plotted
    metric_list: list
        A list containing the various values of the metric over the epochs

    Returns
    -------
    plot: plotnine.Plot
        A plot showing the change in the metric over the epochs
    '''
    data_df = create_data_df(metric_name, metric_list)

    plot = ggplot(data_df, aes(x='epochs', y=metric_name)) + geom_line() +\
        ggtitle('{} vs epochs'.format(metric_name))

    return plot
def histogram(gray_img, mask=None, bins=256, color='red', title=None):
    """Plot a histogram using ggplot.

    Inputs:
    gray_img = grayscale image to analyze
    mask     = binary mask made from selected contours
    bins     = number of classes to divide spectrum into
    color    = color of the line drawn
    title    = custom title for the plot gets drawn if title is not None

    :param gray_img: numpy.ndarray
    :param mask: numpy.ndarray
    :param bins: int
    :param color: str
    :param title: str
    :return fig_hist: ggplot
    """

    params.device += 1
    debug = params.debug
    # Apply mask if one is supplied
    if mask is not None:
        # apply plant shaped mask to image
        params.debug = None
        mask1 = binary_threshold(mask, 0, 255, 'light')
        mask1 = (mask1 / 255)
        masked = np.multiply(gray_img, mask1)
    else:
        masked = gray_img

    params.debug = debug

    if gray_img.dtype == 'uint16':
        maxval = 65536
    else:
        maxval = 256

    # Store histogram data
    hist_gray_data, hist_bins = np.histogram(masked, bins, (1, maxval))
    # make hist percentage for plotting
    pixels = cv2.countNonZero(masked)
    hist_percent = (hist_gray_data / float(pixels)) * 100

    hist_x = hist_percent
    bin_labels = np.arange(0, bins)
    dataset = pd.DataFrame({
        'Grayscale pixel intensity': bin_labels,
        'Proportion of pixels (%)': hist_x
    })
    if title is None:
        fig_hist = (ggplot(data=dataset,
                           mapping=aes(x='Grayscale pixel intensity',
                                       y='Proportion of pixels (%)')) +
                    geom_line(color=color) +
                    scale_x_continuous(breaks=list(range(0, bins, 25))))
    elif title is not None:
        fig_hist = (ggplot(data=dataset,
                           mapping=aes(x='Grayscale pixel intensity',
                                       y='Proportion of pixels (%)')) +
                    geom_line(color=color) +
                    scale_x_continuous(breaks=list(range(0, bins, 25))) +
                    labels.ggtitle(title))

    if params.debug is not None:
        if params.debug == "print":
            fig_hist.save(
                os.path.join(params.debug_outdir,
                             str(params.device) + '_hist.png'))
        if params.debug == "plot":
            print(fig_hist)

    return fig_hist
Beispiel #4
0
df_hpi = df_hpi.assign(sidx=lambda x: x.groupby(cn_gg).idx.diff(1)).dropna().reset_index(None,True)
df_hpi = df_hpi.assign(year=lambda x: x.date.dt.year, sidx=lambda x: np.sign(x.sidx).astype(int))

df_hpi_sidx = df_hpi.pivot_table(index=cn_gg+['year'],columns='sidx',values='idx',aggfunc='count')
df_hpi_sidx = df_hpi_sidx.fillna(0).astype(int).reset_index().melt(cn_gg+['year'],None,None,'n')
df_hpi_sidx = df_hpi_sidx.assign(hpi=lambda x: x.hpi.map(di_hpi),tt=lambda x: x.tt.map(di_tt))
# Plot the number of negative months
tmp = df_hpi_sidx.query('sidx == -1 & city.isin(@cities)', engine='python').drop(columns='sidx')
gg_hpi_sidx = (ggplot(tmp, aes(x='year',y='n',color='tt')) + 
    geom_point() + geom_line() + theme_bw() + 
    scale_y_continuous(breaks=list(range(1,13,1))) + 
    scale_x_continuous(breaks=list(range(2005,2021,1))) + 
    facet_grid('city~hpi') + 
    theme(axis_title_x=element_blank(),axis_text_x=element_text(angle=90)) + 
    scale_color_discrete(name='Housing type') + 
    ggtitle('Index adjusted for seasonality') + 
    labs(x='Year',y='# of negative months'))
gg_save('gg_hpi_sidx.png', dir_figures, gg_hpi_sidx, 8, 8)

###########################################
# --- (2) QUADRANT STRATEGY (MONTHLY) --- #

# (i) Calculate for stock
mm_stock = pd.concat([df_other[cn_ticker],df_reit[cn_ticker]])
mm_stock = get_delta(mm_stock,'price','ticker', 1).dropna()
mm_stock = mm_stock.assign(sidx=lambda x: np.sign(x.mm).astype(int))
mm_stock = mm_stock.drop(columns=['mm','price'])#.rename(columns={'price':'idx'})
ticker12 = mm_stock[mm_stock.date>=dmin].ticker.value_counts().reset_index().query('ticker>=12')['index']
mm_stock = mm_stock[mm_stock.ticker.isin(ticker12)].reset_index(None, True)
# (ii) Get "any" negative change CREA/Teranet/Housing type
mm_hpi_s = df_hpi.drop(columns=['year','idx']).pivot_table('sidx',['date','city'],['tt','hpi'])
Beispiel #5
0
def plot_bar(data,nuclstr,column='value',factor=None,ymin=None,ymax=None,stat='identity',dpi=300,features=None,feature_types=['all'],add_features=[],funcgroups=None,shading_modes=['charge_functional'],usd=False,right_overhang_fix=None,debug=False,startnumber=1,cropseq=(0,None),aspect_ratio=None,reverse_seq=False,double_seq=False,transparent=True,fill_params=None,bar_position='stack',title=None):
    """
    A wrapper function to make a plot of data with bars along the sequnce
    input should be a dataframe with resid, segid column and 'value' 
    This one is inspired by seqplot/seqplot/pdb_plot.py
    """
    
    segid=data['segid'].values[0]
    
    if title is None:
        title="Segid: %s, Type: %s"%(segid,nuclstr.components[segid]['type'])
    
    seq=Seq(str(nuclstr.seqs[segid]['fullseq']),generic_protein \
                if nuclstr.components[segid]['entity'] is 'DNA' or 'histone' or 'protein' else generic_dna)
    msar=MultipleSeqAlignment([SeqRecord(seq=seq,id=nuclstr.components[segid]['type']+':'+segid,\
                                         name=nuclstr.components[segid]['type']+':'+segid)])
    if(reverse_seq):
        logger.info("Experimental feature will reverse the sequence")
        msar[0].seq=msar[0].seq[::-1]

    if double_seq:
          msar.add_sequence('reverse',str(msar[0].seq[::-1]))

        
    msar=msar[:,cropseq[0]:cropseq[1]]
        
    
#     print("Seq to plot:",msar)
             
    #We need to get starting residue, currently for DNA chains only cifseq gets it correctly
    resid_start=nuclstr.seqs[segid]['resid_start']
    
    logger.debug("Starting resid",resid_start)
    

    overhang=nuclstr.seqs[segid]['overhangL']
    
    datafixed=data.copy()
    datafixed.loc[:,'resid']=datafixed.loc[:,'resid']-resid_start+overhang+1-cropseq[0]

    
    sl=len(msar[0].seq)

#     fn=shade.seqfeat2shadefeat(msar,feature_types=feature_types,force_feature_pos='bottom',debug=debug)
    if features is None:
        fn=nuclstr.shading_features[segid]
    else:
        fn=features
    fn2=[]
    for i in fn:
        if (i['style'] in feature_types) or ('all' in feature_types) :
            fn2.append(i)
            
    fn2.extend(add_features)
    if usd:
        ruler='top'
    else:
        ruler=None
    shaded=ipyshade.shadedmsa4plot(msar,features=fn2,shading_modes=shading_modes,debug=debug,startnumber=startnumber,setends=[startnumber-2,sl+startnumber+2],funcgroups=funcgroups,ruler=ruler,density=200)
        
    #If sl%10=10 se will have a ruler number hanging beyond the sequence image, and we need to correct for that.
    if right_overhang_fix is None:
        if sl%10==0:
            if sl<100:
                rof= 0.1
            else:
                rof=0.5
        else:
            rof=0
    else:
        rof=right_overhang_fix
    if (not aspect_ratio is None ):
        ar=aspect_ratio
    else:
        ar=0.2*100./sl
#     print(datafixed)
    plot=(ggplot(data=datafixed,mapping=aes(x='resid', y=column))
#         + geom_point(size=0.1)
#           +geom_bar(stat='identity',width=0.5,mapping=aes(fill=factor))
        + scale_x_continuous(limits=(0.5,sl+0.5+rof),expand=(0,0.2),name='',breaks=[])
       # + scale_y_continuous(breaks=[0,0.5,1.0])
        + theme_light()+theme(aspect_ratio=ar,dpi=dpi,plot_margin=0,text=element_text(size=6), legend_key_size=5 ,legend_position='bottom',legend_direction='horizontal'))
    #+ facet_wrap('~ segid',dir='v') +guides(color=guide_legend(ncol=10))
    if factor is None:
        plot=plot+geom_bar(stat=stat,width=0.5)
    else:
        plot=plot+geom_bar(stat=stat,width=0.5,mapping=aes(fill=factor),position=bar_position)
        
    if fill_params is not None:
        plot=plot+scale_fill_manual(**fill_params)
    
    if not usd:
        if (ymax is not None) :
            plot=plot+scale_y_continuous(limits=(None,ymax))
    else:
        if (ymin is not None) :
            plot=plot+scale_y_continuous(limits=(ymin,None))
    
    if ymax is None:
        ymax=data[column].max()
    if ymin is None:
        ymin=data[column].min()
#     print(ymax)
    plot = plot + geom_seq_x(seqimg=shaded.img,\
                   xlim=(1,sl+rof),ylim=(ymin,ymax),usd=usd,aspect_ratio=ar,transparent=transparent)+ggtitle(title)
    
    
    return plot
Beispiel #6
0
def plot_line(data,nuclstr,columns=['value'],ymin=None,ymax=None,dpi=300,features=None,feature_types=['all'],add_features=[],funcgroups=None,shading_modes=['charge_functional'],right_overhang_fix=None,debug=False,startnumber=1,cropseq=(0,None),aspect_ratio=None,reverse_seq=False,transparent=True,xshift=0):
    """
    A wrapper function to make a plot of data with bars along the sequnce
    input should be a dataframe with resid, segid column and 'value' 
    This one is inspired by seqplot/seqplot/pdb_plot.py
    funcgroup example fg="\\funcgroup{xxx}{CT}{White}{Green}{upper}{up} \\funcgroup{xxx}{GA}{White}{Blue}{upper}{up}"
    """
    if isinstance(columns,str):
        columns=[columns]
    segid=data['segid'].values[0]
    
    title="Segid: %s, Type: %s"%(segid,nuclstr.components[segid]['type'])

    seq=Seq(str(nuclstr.seqs[segid]['fullseq']),generic_protein \
                if nuclstr.components[segid]['entity'] is 'DNA' or 'histone' or 'protein' else generic_dna)
    msar=MultipleSeqAlignment([SeqRecord(seq=seq,id=nuclstr.components[segid]['type']+':'+segid,\
                                         name=nuclstr.components[segid]['type']+':'+segid)])
    if(reverse_seq):
        logger.info("Experimental feature will reverse the sequence")
        msar[0].seq=msar[0].seq[::-1]
        
    msar=msar[:,cropseq[0]:cropseq[1]]

    
#     print("Seq to plot:",msar)
             
    #We need to get starting residue, currently for DNA chains only cifseq gets it correctly
    resid_start=nuclstr.seqs[segid]['resid_start']
    
    logger.debug("Starting resid %d"%int(resid_start))
    

    overhang=nuclstr.seqs[segid]['overhangL']
    
    datafixed=data.copy()
    datafixed.loc[:,'resid']=datafixed.loc[:,'resid']-resid_start+overhang+1-cropseq[0]+xshift

#     print(datafixed)
    sl=len(msar[0].seq)

#     fn=shade.seqfeat2shadefeat(msar,feature_types=feature_types,force_feature_pos='bottom',debug=debug)
    if features is None:
        fn=nuclstr.shading_features[segid]
    else:
        fn=features
    fn2=[]
    for i in fn:
        if (i['style'] in feature_types) or ('all' in feature_types) :
            fn2.append(i)
            
    fn2.extend(add_features)
    shaded=ipyshade.shadedmsa4plot(msar,features=fn2,shading_modes=shading_modes,debug=debug,startnumber=startnumber,setends=[startnumber-2,sl+startnumber+2],funcgroups=funcgroups,density=200)
        
    #If sl%10=10 se will have a ruler number hanging beyond the sequence image, and we need to correct for that.
    if right_overhang_fix is None:
        if sl%10==0:
            if sl<100:
                rof= 0.1
            else:
                rof=0.5
        else:
            rof=0
    else:
        rof=right_overhang_fix
    if (not aspect_ratio is None ):
        ar=aspect_ratio
    else:
        ar=0.15*100./sl
        
    md=pd.melt(datafixed,id_vars=['segid','resid'],value_vars=columns)
#     print(md)
#     print(md)
#     print(md['variable'])
    plot=(ggplot(data=md,mapping=aes(x='resid', y='value'))
        + geom_point(aes(color='variable'),size=0.1)+geom_line(aes(color='variable'),stat='identity')
        + scale_x_continuous(limits=(0.5,sl+0.5+rof),expand=(0,0.2),name='',breaks=[])
#         + scale_y_continuous()
        + theme_light()+theme(aspect_ratio=ar,dpi=dpi,plot_margin=0)) #+ facet_wrap('~ segid',dir='v')

    if ymax is not None:
        plot=plot+scale_y_continuous(limits=(None,ymax))
    
    if ymin is None:
        ymin=md['value'].min()
    if ymax is None:
        ymax=md['value'].max()
    plot = plot + geom_seq_x(seqimg=shaded.img,\
                   xlim=(1,sl+rof),ylim=(ymin,ymax),aspect_ratio=ar,transparent=transparent)+ggtitle(title)
    

    
    return plot
Beispiel #7
0
def histogram(img,
              mask=None,
              bins=100,
              lower_bound=None,
              upper_bound=None,
              title=None,
              hist_data=False):
    """Plot histograms of each input image channel

    Inputs:
    img            = an RGB or grayscale image to analyze
    mask           = binary mask, calculate histogram from masked area only (default=None)
    bins           = divide the data into n evenly spaced bins (default=100)
    lower_bound    = the lower bound of the bins (x-axis min value) (default=None)
    upper_bound    = the upper bound of the bins (x-axis max value) (default=None)
    title          = a custom title for the plot (default=None)
    hist_data      = return the frequency distribution data if True (default=False)

    Returns:
    fig_hist       = histogram figure
    hist_df        = dataframe with histogram data, with columns "pixel intensity" and "proportion of pixels (%)"

    :param img: numpy.ndarray
    :param mask: numpy.ndarray
    :param bins: int
    :param lower_bound: int
    :param upper_bound: int
    :param title: str
    :param hist_data: bool
    :return fig_hist: plotnine.ggplot.ggplot
    :return hist_df: pandas.core.frame.DataFrame
    """
    if not isinstance(img, np.ndarray):
        fatal_error("Only image of type numpy.ndarray is supported input!")
    if len(img.shape) < 2:
        fatal_error("Input image should be at least a 2d array!")

    if mask is not None:
        masked = img[np.where(mask > 0)]
        img_min, img_max = np.nanmin(masked), np.nanmax(masked)
    else:
        img_min, img_max = np.nanmin(img), np.nanmax(img)

    # for lower / upper bound, if given, use the given value, otherwise, use the min / max of the image
    lower_bound = lower_bound if lower_bound is not None else img_min
    upper_bound = upper_bound if upper_bound is not None else img_max

    if len(img.shape) > 2:
        if img.shape[2] == 3:
            b_names = ['blue', 'green', 'red']
        else:
            b_names = [str(i) for i in range(img.shape[2])]

    if len(img.shape) == 2:
        bin_labels, hist_percent, hist_ = _hist_gray(img,
                                                     bins=bins,
                                                     lower_bound=lower_bound,
                                                     upper_bound=upper_bound,
                                                     mask=mask)
        hist_df = pd.DataFrame({
            'pixel intensity':
            bin_labels,
            'proportion of pixels (%)':
            hist_percent,
            'hist_count':
            hist_,
            'color channel': ['0' for _ in range(len(hist_percent))]
        })
    else:
        # Assumption: RGB image
        # Initialize dataframe column arrays
        px_int = np.array([])
        prop = np.array([])
        hist_count = np.array([])
        channel = []
        for (b, b_name) in enumerate(b_names):
            bin_labels, hist_percent, hist_ = _hist_gray(
                img[:, :, b],
                bins=bins,
                lower_bound=lower_bound,
                upper_bound=upper_bound,
                mask=mask)
            # Append histogram data for each channel
            px_int = np.append(px_int, bin_labels)
            prop = np.append(prop, hist_percent)
            hist_count = np.append(hist_count, hist_)
            channel = channel + [b_name for _ in range(len(hist_percent))]
        # Create dataframe
        hist_df = pd.DataFrame({
            'pixel intensity': px_int,
            'proportion of pixels (%)': prop,
            'hist_count': hist_count,
            'color channel': channel
        })

    fig_hist = (ggplot(data=hist_df,
                       mapping=aes(x='pixel intensity',
                                   y='proportion of pixels (%)',
                                   color='color channel')) + geom_line())

    if title is not None:
        fig_hist = fig_hist + labels.ggtitle(title)
    if len(img.shape) > 2 and img.shape[2] == 3:
        fig_hist = fig_hist + scale_color_manual(['blue', 'green', 'red'])

    # Plot or print the histogram
    _debug(visual=fig_hist,
           filename=os.path.join(params.debug_outdir,
                                 str(params.device) + '_hist.png'))

    if hist_data is True:
        return fig_hist, hist_df
    return fig_hist