def plot_tune_acc(results): '''Plot the model's tuning set accuracy over time''' metric_list = results['tune_acc'] metric_name = 'Tuning Set Accuracy' plot = plot_metric(metric_name, metric_list) plot = plot + ggtitle( '{} vs epochs'.format(metric_name)) + make_baseline_geom(results) return plot
def plot_metric(metric_name, metric_list): '''This function is the generic plotting function for plotting a given metric Arguments --------- metric_name: string The human understandable name of the metric being plotted metric_list: list A list containing the various values of the metric over the epochs Returns ------- plot: plotnine.Plot A plot showing the change in the metric over the epochs ''' data_df = create_data_df(metric_name, metric_list) plot = ggplot(data_df, aes(x='epochs', y=metric_name)) + geom_line() +\ ggtitle('{} vs epochs'.format(metric_name)) return plot
def histogram(gray_img, mask=None, bins=256, color='red', title=None): """Plot a histogram using ggplot. Inputs: gray_img = grayscale image to analyze mask = binary mask made from selected contours bins = number of classes to divide spectrum into color = color of the line drawn title = custom title for the plot gets drawn if title is not None :param gray_img: numpy.ndarray :param mask: numpy.ndarray :param bins: int :param color: str :param title: str :return fig_hist: ggplot """ params.device += 1 debug = params.debug # Apply mask if one is supplied if mask is not None: # apply plant shaped mask to image params.debug = None mask1 = binary_threshold(mask, 0, 255, 'light') mask1 = (mask1 / 255) masked = np.multiply(gray_img, mask1) else: masked = gray_img params.debug = debug if gray_img.dtype == 'uint16': maxval = 65536 else: maxval = 256 # Store histogram data hist_gray_data, hist_bins = np.histogram(masked, bins, (1, maxval)) # make hist percentage for plotting pixels = cv2.countNonZero(masked) hist_percent = (hist_gray_data / float(pixels)) * 100 hist_x = hist_percent bin_labels = np.arange(0, bins) dataset = pd.DataFrame({ 'Grayscale pixel intensity': bin_labels, 'Proportion of pixels (%)': hist_x }) if title is None: fig_hist = (ggplot(data=dataset, mapping=aes(x='Grayscale pixel intensity', y='Proportion of pixels (%)')) + geom_line(color=color) + scale_x_continuous(breaks=list(range(0, bins, 25)))) elif title is not None: fig_hist = (ggplot(data=dataset, mapping=aes(x='Grayscale pixel intensity', y='Proportion of pixels (%)')) + geom_line(color=color) + scale_x_continuous(breaks=list(range(0, bins, 25))) + labels.ggtitle(title)) if params.debug is not None: if params.debug == "print": fig_hist.save( os.path.join(params.debug_outdir, str(params.device) + '_hist.png')) if params.debug == "plot": print(fig_hist) return fig_hist
df_hpi = df_hpi.assign(sidx=lambda x: x.groupby(cn_gg).idx.diff(1)).dropna().reset_index(None,True) df_hpi = df_hpi.assign(year=lambda x: x.date.dt.year, sidx=lambda x: np.sign(x.sidx).astype(int)) df_hpi_sidx = df_hpi.pivot_table(index=cn_gg+['year'],columns='sidx',values='idx',aggfunc='count') df_hpi_sidx = df_hpi_sidx.fillna(0).astype(int).reset_index().melt(cn_gg+['year'],None,None,'n') df_hpi_sidx = df_hpi_sidx.assign(hpi=lambda x: x.hpi.map(di_hpi),tt=lambda x: x.tt.map(di_tt)) # Plot the number of negative months tmp = df_hpi_sidx.query('sidx == -1 & city.isin(@cities)', engine='python').drop(columns='sidx') gg_hpi_sidx = (ggplot(tmp, aes(x='year',y='n',color='tt')) + geom_point() + geom_line() + theme_bw() + scale_y_continuous(breaks=list(range(1,13,1))) + scale_x_continuous(breaks=list(range(2005,2021,1))) + facet_grid('city~hpi') + theme(axis_title_x=element_blank(),axis_text_x=element_text(angle=90)) + scale_color_discrete(name='Housing type') + ggtitle('Index adjusted for seasonality') + labs(x='Year',y='# of negative months')) gg_save('gg_hpi_sidx.png', dir_figures, gg_hpi_sidx, 8, 8) ########################################### # --- (2) QUADRANT STRATEGY (MONTHLY) --- # # (i) Calculate for stock mm_stock = pd.concat([df_other[cn_ticker],df_reit[cn_ticker]]) mm_stock = get_delta(mm_stock,'price','ticker', 1).dropna() mm_stock = mm_stock.assign(sidx=lambda x: np.sign(x.mm).astype(int)) mm_stock = mm_stock.drop(columns=['mm','price'])#.rename(columns={'price':'idx'}) ticker12 = mm_stock[mm_stock.date>=dmin].ticker.value_counts().reset_index().query('ticker>=12')['index'] mm_stock = mm_stock[mm_stock.ticker.isin(ticker12)].reset_index(None, True) # (ii) Get "any" negative change CREA/Teranet/Housing type mm_hpi_s = df_hpi.drop(columns=['year','idx']).pivot_table('sidx',['date','city'],['tt','hpi'])
def plot_bar(data,nuclstr,column='value',factor=None,ymin=None,ymax=None,stat='identity',dpi=300,features=None,feature_types=['all'],add_features=[],funcgroups=None,shading_modes=['charge_functional'],usd=False,right_overhang_fix=None,debug=False,startnumber=1,cropseq=(0,None),aspect_ratio=None,reverse_seq=False,double_seq=False,transparent=True,fill_params=None,bar_position='stack',title=None): """ A wrapper function to make a plot of data with bars along the sequnce input should be a dataframe with resid, segid column and 'value' This one is inspired by seqplot/seqplot/pdb_plot.py """ segid=data['segid'].values[0] if title is None: title="Segid: %s, Type: %s"%(segid,nuclstr.components[segid]['type']) seq=Seq(str(nuclstr.seqs[segid]['fullseq']),generic_protein \ if nuclstr.components[segid]['entity'] is 'DNA' or 'histone' or 'protein' else generic_dna) msar=MultipleSeqAlignment([SeqRecord(seq=seq,id=nuclstr.components[segid]['type']+':'+segid,\ name=nuclstr.components[segid]['type']+':'+segid)]) if(reverse_seq): logger.info("Experimental feature will reverse the sequence") msar[0].seq=msar[0].seq[::-1] if double_seq: msar.add_sequence('reverse',str(msar[0].seq[::-1])) msar=msar[:,cropseq[0]:cropseq[1]] # print("Seq to plot:",msar) #We need to get starting residue, currently for DNA chains only cifseq gets it correctly resid_start=nuclstr.seqs[segid]['resid_start'] logger.debug("Starting resid",resid_start) overhang=nuclstr.seqs[segid]['overhangL'] datafixed=data.copy() datafixed.loc[:,'resid']=datafixed.loc[:,'resid']-resid_start+overhang+1-cropseq[0] sl=len(msar[0].seq) # fn=shade.seqfeat2shadefeat(msar,feature_types=feature_types,force_feature_pos='bottom',debug=debug) if features is None: fn=nuclstr.shading_features[segid] else: fn=features fn2=[] for i in fn: if (i['style'] in feature_types) or ('all' in feature_types) : fn2.append(i) fn2.extend(add_features) if usd: ruler='top' else: ruler=None shaded=ipyshade.shadedmsa4plot(msar,features=fn2,shading_modes=shading_modes,debug=debug,startnumber=startnumber,setends=[startnumber-2,sl+startnumber+2],funcgroups=funcgroups,ruler=ruler,density=200) #If sl%10=10 se will have a ruler number hanging beyond the sequence image, and we need to correct for that. if right_overhang_fix is None: if sl%10==0: if sl<100: rof= 0.1 else: rof=0.5 else: rof=0 else: rof=right_overhang_fix if (not aspect_ratio is None ): ar=aspect_ratio else: ar=0.2*100./sl # print(datafixed) plot=(ggplot(data=datafixed,mapping=aes(x='resid', y=column)) # + geom_point(size=0.1) # +geom_bar(stat='identity',width=0.5,mapping=aes(fill=factor)) + scale_x_continuous(limits=(0.5,sl+0.5+rof),expand=(0,0.2),name='',breaks=[]) # + scale_y_continuous(breaks=[0,0.5,1.0]) + theme_light()+theme(aspect_ratio=ar,dpi=dpi,plot_margin=0,text=element_text(size=6), legend_key_size=5 ,legend_position='bottom',legend_direction='horizontal')) #+ facet_wrap('~ segid',dir='v') +guides(color=guide_legend(ncol=10)) if factor is None: plot=plot+geom_bar(stat=stat,width=0.5) else: plot=plot+geom_bar(stat=stat,width=0.5,mapping=aes(fill=factor),position=bar_position) if fill_params is not None: plot=plot+scale_fill_manual(**fill_params) if not usd: if (ymax is not None) : plot=plot+scale_y_continuous(limits=(None,ymax)) else: if (ymin is not None) : plot=plot+scale_y_continuous(limits=(ymin,None)) if ymax is None: ymax=data[column].max() if ymin is None: ymin=data[column].min() # print(ymax) plot = plot + geom_seq_x(seqimg=shaded.img,\ xlim=(1,sl+rof),ylim=(ymin,ymax),usd=usd,aspect_ratio=ar,transparent=transparent)+ggtitle(title) return plot
def plot_line(data,nuclstr,columns=['value'],ymin=None,ymax=None,dpi=300,features=None,feature_types=['all'],add_features=[],funcgroups=None,shading_modes=['charge_functional'],right_overhang_fix=None,debug=False,startnumber=1,cropseq=(0,None),aspect_ratio=None,reverse_seq=False,transparent=True,xshift=0): """ A wrapper function to make a plot of data with bars along the sequnce input should be a dataframe with resid, segid column and 'value' This one is inspired by seqplot/seqplot/pdb_plot.py funcgroup example fg="\\funcgroup{xxx}{CT}{White}{Green}{upper}{up} \\funcgroup{xxx}{GA}{White}{Blue}{upper}{up}" """ if isinstance(columns,str): columns=[columns] segid=data['segid'].values[0] title="Segid: %s, Type: %s"%(segid,nuclstr.components[segid]['type']) seq=Seq(str(nuclstr.seqs[segid]['fullseq']),generic_protein \ if nuclstr.components[segid]['entity'] is 'DNA' or 'histone' or 'protein' else generic_dna) msar=MultipleSeqAlignment([SeqRecord(seq=seq,id=nuclstr.components[segid]['type']+':'+segid,\ name=nuclstr.components[segid]['type']+':'+segid)]) if(reverse_seq): logger.info("Experimental feature will reverse the sequence") msar[0].seq=msar[0].seq[::-1] msar=msar[:,cropseq[0]:cropseq[1]] # print("Seq to plot:",msar) #We need to get starting residue, currently for DNA chains only cifseq gets it correctly resid_start=nuclstr.seqs[segid]['resid_start'] logger.debug("Starting resid %d"%int(resid_start)) overhang=nuclstr.seqs[segid]['overhangL'] datafixed=data.copy() datafixed.loc[:,'resid']=datafixed.loc[:,'resid']-resid_start+overhang+1-cropseq[0]+xshift # print(datafixed) sl=len(msar[0].seq) # fn=shade.seqfeat2shadefeat(msar,feature_types=feature_types,force_feature_pos='bottom',debug=debug) if features is None: fn=nuclstr.shading_features[segid] else: fn=features fn2=[] for i in fn: if (i['style'] in feature_types) or ('all' in feature_types) : fn2.append(i) fn2.extend(add_features) shaded=ipyshade.shadedmsa4plot(msar,features=fn2,shading_modes=shading_modes,debug=debug,startnumber=startnumber,setends=[startnumber-2,sl+startnumber+2],funcgroups=funcgroups,density=200) #If sl%10=10 se will have a ruler number hanging beyond the sequence image, and we need to correct for that. if right_overhang_fix is None: if sl%10==0: if sl<100: rof= 0.1 else: rof=0.5 else: rof=0 else: rof=right_overhang_fix if (not aspect_ratio is None ): ar=aspect_ratio else: ar=0.15*100./sl md=pd.melt(datafixed,id_vars=['segid','resid'],value_vars=columns) # print(md) # print(md) # print(md['variable']) plot=(ggplot(data=md,mapping=aes(x='resid', y='value')) + geom_point(aes(color='variable'),size=0.1)+geom_line(aes(color='variable'),stat='identity') + scale_x_continuous(limits=(0.5,sl+0.5+rof),expand=(0,0.2),name='',breaks=[]) # + scale_y_continuous() + theme_light()+theme(aspect_ratio=ar,dpi=dpi,plot_margin=0)) #+ facet_wrap('~ segid',dir='v') if ymax is not None: plot=plot+scale_y_continuous(limits=(None,ymax)) if ymin is None: ymin=md['value'].min() if ymax is None: ymax=md['value'].max() plot = plot + geom_seq_x(seqimg=shaded.img,\ xlim=(1,sl+rof),ylim=(ymin,ymax),aspect_ratio=ar,transparent=transparent)+ggtitle(title) return plot
def histogram(img, mask=None, bins=100, lower_bound=None, upper_bound=None, title=None, hist_data=False): """Plot histograms of each input image channel Inputs: img = an RGB or grayscale image to analyze mask = binary mask, calculate histogram from masked area only (default=None) bins = divide the data into n evenly spaced bins (default=100) lower_bound = the lower bound of the bins (x-axis min value) (default=None) upper_bound = the upper bound of the bins (x-axis max value) (default=None) title = a custom title for the plot (default=None) hist_data = return the frequency distribution data if True (default=False) Returns: fig_hist = histogram figure hist_df = dataframe with histogram data, with columns "pixel intensity" and "proportion of pixels (%)" :param img: numpy.ndarray :param mask: numpy.ndarray :param bins: int :param lower_bound: int :param upper_bound: int :param title: str :param hist_data: bool :return fig_hist: plotnine.ggplot.ggplot :return hist_df: pandas.core.frame.DataFrame """ if not isinstance(img, np.ndarray): fatal_error("Only image of type numpy.ndarray is supported input!") if len(img.shape) < 2: fatal_error("Input image should be at least a 2d array!") if mask is not None: masked = img[np.where(mask > 0)] img_min, img_max = np.nanmin(masked), np.nanmax(masked) else: img_min, img_max = np.nanmin(img), np.nanmax(img) # for lower / upper bound, if given, use the given value, otherwise, use the min / max of the image lower_bound = lower_bound if lower_bound is not None else img_min upper_bound = upper_bound if upper_bound is not None else img_max if len(img.shape) > 2: if img.shape[2] == 3: b_names = ['blue', 'green', 'red'] else: b_names = [str(i) for i in range(img.shape[2])] if len(img.shape) == 2: bin_labels, hist_percent, hist_ = _hist_gray(img, bins=bins, lower_bound=lower_bound, upper_bound=upper_bound, mask=mask) hist_df = pd.DataFrame({ 'pixel intensity': bin_labels, 'proportion of pixels (%)': hist_percent, 'hist_count': hist_, 'color channel': ['0' for _ in range(len(hist_percent))] }) else: # Assumption: RGB image # Initialize dataframe column arrays px_int = np.array([]) prop = np.array([]) hist_count = np.array([]) channel = [] for (b, b_name) in enumerate(b_names): bin_labels, hist_percent, hist_ = _hist_gray( img[:, :, b], bins=bins, lower_bound=lower_bound, upper_bound=upper_bound, mask=mask) # Append histogram data for each channel px_int = np.append(px_int, bin_labels) prop = np.append(prop, hist_percent) hist_count = np.append(hist_count, hist_) channel = channel + [b_name for _ in range(len(hist_percent))] # Create dataframe hist_df = pd.DataFrame({ 'pixel intensity': px_int, 'proportion of pixels (%)': prop, 'hist_count': hist_count, 'color channel': channel }) fig_hist = (ggplot(data=hist_df, mapping=aes(x='pixel intensity', y='proportion of pixels (%)', color='color channel')) + geom_line()) if title is not None: fig_hist = fig_hist + labels.ggtitle(title) if len(img.shape) > 2 and img.shape[2] == 3: fig_hist = fig_hist + scale_color_manual(['blue', 'green', 'red']) # Plot or print the histogram _debug(visual=fig_hist, filename=os.path.join(params.debug_outdir, str(params.device) + '_hist.png')) if hist_data is True: return fig_hist, hist_df return fig_hist