def test_arrow(): p = (ggplot(df, aes('x', 'y', xend='xend', yend='yend')) + geom_segment(aes('x+2', xend='xend+2'), arrow=arrow(), size=2) + geom_segment( aes('x+4', xend='xend+4'), arrow=arrow(ends='first'), size=2) + geom_segment( aes('x+6', xend='xend+6'), arrow=arrow(ends='both'), size=2)) assert p == 'arrow'
def test_aesthetics(): p = ( ggplot(df, aes('x', 'y', xend='xend', yend='yend')) + geom_segment(size=2) + # Positive slope segments geom_segment(aes(yend='yend+1', color='factor(z)'), size=2) + geom_segment(aes(yend='yend+2', linetype='factor(z)'), size=2) + geom_segment(aes(yend='yend+3', size='z'), show_legend=False) + geom_segment(aes(yend='yend+4', alpha='z'), size=2, show_legend=False)) assert p + _theme == 'aesthetics'
def test_arrow(): p = (ggplot(df, aes('x', 'y', xend='xend', yend='yend')) + geom_segment(aes('x+2', xend='xend+2'), arrow=arrow(), size=2) + geom_segment(aes('x+4', xend='xend+4'), arrow=arrow(ends='first'), size=2) + geom_segment(aes('x+6', xend='xend+6'), arrow=arrow(ends='both'), size=2) ) assert p == 'arrow'
def test_aesthetics(): p = (ggplot(df, aes('x', 'y', xend='xend', yend='yend')) + geom_segment(size=2) + # Positive slope segments geom_segment(aes(yend='yend+1', color='factor(z)'), size=2) + geom_segment(aes(yend='yend+2', linetype='factor(z)'), size=2) + geom_segment(aes(yend='yend+3', size='z'), show_legend=False) + geom_segment(aes(yend='yend+4', alpha='z'), size=2, show_legend=False)) assert p + _theme == 'aesthetics'
def plot(solu, k): # Generates a plot of the four bar mechanism, which represents a frame in the animation print("Frame: ", k) sol = solu[k:k + 1] p = ( ggplot(sol) + # MAIN LINKAGE geom_segment(aes(x = 0, y = 0, xend = sol.Ro4[k].real, yend = sol.Ro4[k].imag)) + geom_point(aes(x=0, y=0), shape = 'o', size = 3) + geom_point(aes(x = sol.Ro4[k].real, y = sol.Ro4[k].imag), shape = 'o', size = 3) + # 2ND LINKAGE geom_segment(aes(x = 0, y = 0, xend = sol.Ra[k].real, yend = sol.Ra[k].imag)) + geom_point(aes(x = sol.Ra[k].real, y = sol.Ra[k].imag), shape = 'o', size = 3) + # AP LINKAGE geom_segment(aes(x = sol.Ra[k].real, y = sol.Ra[k].imag, xend = sol.Rpa[k].real, yend = sol.Rpa[k].imag)) + geom_point(aes(x = sol.Rpa[k].real, y = sol.Rpa[k].imag), shape = 'o', size = 3) + # 3RD LINKAGE geom_segment(aes(x = sol.Ra[k].real, y = sol.Ra[k].imag, xend = sol.Rba[k].real, yend = sol.Rba[k].imag)) + geom_point(aes(x = sol.Rba[k].real, y = sol.Rba[k].imag), shape = 'o', size = 3) + # 4TH LINKAGE geom_segment(aes(x = sol.Rba[k].real, y = sol.Rba[k].imag, xend = sol.Ro4[k].real, yend = sol.Ro4[k].imag)) + geom_point(aes(x = sol.Rba[k].real, y = sol.Rba[k].imag), shape = 'o', size = 3) + # NODES IDENTIFICATION annotate("text", x = 0, y = -20, label = "$O_1$") + annotate("text", x = sol.Ro4[k].real, y = sol.Ro4[k].imag -20, label = "$O_4$") + annotate("text", x = sol.Ra[k].real+10, y = sol.Ra[k].imag, label = "$A$") + annotate("text", x = sol.Rba[k].real +20, y = sol.Rba[k].imag -10, label = "$B$") + annotate("text", x = sol.Rpa[k].real, y = sol.Rpa[k].imag -40, label = "$P$") + # ACCELERATIONS ARROWS (you may remove if you wish to remove acceleration informations) geom_segment(aes(x = sol.Rba[k].real, y = sol.Rba[k].imag, \ xend = sol.Rba[k].real + sol.Aba[k].real * ACC_SCALE, \ yend = sol.Rba[k].imag + sol.Aba[k].imag * ACC_SCALE),\ colour='red', arrow=arrow()) + # Point B geom_segment(aes(x = sol.Ra[k].real, y = sol.Ra[k].imag, \ xend = sol.Ra[k].real + sol.Aa[k].real * ACC_SCALE, \ yend = sol.Ra[k].imag + sol.Aa[k].imag * ACC_SCALE),\ colour='red', arrow=arrow()) + # Point A geom_segment(aes(x = sol.Rpa[k].real, y = sol.Rpa[k].imag, \ xend = sol.Rpa[k].real + sol.Apaa[k].real * ACC_SCALE, \ yend = sol.Rpa[k].imag + sol.Apaa[k].imag * ACC_SCALE),\ colour='red', arrow=arrow()) + # Point C # ACCELERATIONS TEXTS (you may comment if you wish to remove acceleration informations) # inputting text between '$ $' makes plotnine produce beautiful LaTeX text annotate("text", x = sol.Rba[k].real-30, y = sol.Rba[k].imag+10, label = f'${np.absolute(sol.Aba[k])/1000:.2f}~m/s^2$', colour='red') + annotate("text", x = sol.Ra[k].real+20, y = sol.Ra[k].imag-20, label = f'${np.absolute(sol.Aa[k])/1000:.2f}~m/s^2$', colour='red') + annotate("text", x = sol.Rpa[k].real+10, y = sol.Rpa[k].imag+20, label = f'${np.absolute(sol.Apaa[k])/1000:.2f}~m/s^2$', colour='red') + # TIME IDENTIFICATION annotate("label", x = 120, y = -80, label = f'Time: ${sol.time[k]:.2f}~s$', alpha = 1) + # labs(x='$x~[mm]$', y='$y~[mm]$') + coord_cartesian(xlim=SCALE_X, ylim=SCALE_Y) + # Scales plot limits, avoiding it to be bigger than necessary. You may comment this out if you wish to do so. theme_bw() # Plot is prettier with this theme compared to the default. ) return p
def create_length_plot(len_df, legend_position='right', legend_box='vertical'): mean_len_df = len_df.groupby(['Task', 'Method']).mean().reset_index() mean_len_df[' '] = 'Mean Length' plt = (ggplot(len_df) + aes(x='x', fill='Method', y='..density..') + geom_histogram(binwidth=2, position='identity', alpha=.6) + geom_text(aes(x='x', y=.22, label='x', color='Method'), mean_len_df, inherit_aes=False, format_string='{:.1f}', show_legend=False) + geom_segment(aes(x='x', xend='x', y=0, yend=.205, linetype=' '), mean_len_df, inherit_aes=False, color='black') + scale_linetype_manual(['dashed']) + facet_wrap('Task') + xlim(0, 20) + ylim(0, .23) + xlab('Example Length') + ylab('Frequency') + scale_color_manual(values=COLORS) + scale_fill_manual(values=COLORS) + theme_fs() + theme( aspect_ratio=1, legend_title=element_blank(), legend_position=legend_position, legend_box=legend_box, )) return plt
def plot_ROC(label_list, pred_list, names=None, **args): """ 複数の ROC 曲線をプロットする :param: label_list: 正解ラベルリストの配列. [(y1, y2, ...), (y1, y2, ...)] のようにして与える, pred_list に対応させる :param: pred_list: 予測確率リストの配列. label_list と同じ長さにすること :param: names=None: モデルの名称. None または同じ長さにすること. 指定しない場合, ラベルの組が 2~3 ならば ['train', 'valid', 'test'] を与える. 3より多い場合は通し番号にする. :param args: sklearn.metrics.roc_curve に与えるパラメータ :return: plotnine オブジェクト """ if names is None: if len(label_list) == 2: names = ('train', 'test') elif len(label_list) == 3: names = ('train', 'valid', 'test') else: names = list(range(len(label_list))) else: pass roc = [roc_curve(y, p, **args) for y, p in zip(label_list, pred_list)] fpr, tpr = tuple([list(chain.from_iterable(x)) for x in zip(*roc)][0:2]) models = chain.from_iterable([[name] * l for name, l in zip(names, [len(x) for x, y, _ in roc])]) d_roc = pd.DataFrame({'fpr': fpr, 'tpr': tpr, 'model': models}) return ggplot( d_roc, aes(x='fpr', y='tpr', group='model', color='model') ) + geom_segment(x=0, y=0, xend=1, yend=1, linetype=':', color='grey' ) + geom_line( ) + scale_color_discrete(breaks=names ) + labs(x='false positive rate', y='true positive rate' ) + coord_equal(ratio=1, xlim=[0, 1], ylim=[0, 1] ) + theme_classic() + theme(figure_size=(4, 4))
def add_mirna_g(g,df, str_name,str_start,str_end,dis_pos,l_s,l_e,l_score=[]): # print(str_name,str_start,str_end,dis_pos,l_s,l_e) df[str_start]= pd.Series(l_s) df[str_end] = pd.Series(l_e) g+= pt.annotate("text", x=0,y=dis_pos,label=str_name) g+= pt.geom_errorbarh(df,pt.aes(xmin=str_start,y=(dis_pos),xmax=str_end,color='mi_name')) g+= pt.geom_segment(df,pt.aes(x=str_start,y=(dis_pos),yend=0,xend=str_start,color='mi_name')) if(l_score): # print(l_score) # pd.options.display.float_format = '{:.1f}'.format score_column_name = 'score'+str_name # print(l_score,score_column_name,str_start,dis_pos) df[score_column_name] = pd.Series(l_score,dtype=np.float).map('{:.0f}'.format) g+= pt.geom_text(df, pt.aes(x=str_start,y=dis_pos,label=score_column_name,color='mi_name'), nudge_x=0.1, nudge_y=0.1)#,adjust_text=adjust_text_dict)
def plot_contour(df, var=None, out="out", level="level", aux=False): r"""Plot 2d contours Plot contours. Usually called as a dispatch from plot_auto(). Args: var (array of str): Variables for plot axes out (str): Name of output identifier column level (str): Name of level identifier column aux (bool): Auxillary variables present? Returns: ggplot: Contour image Examples: >>> import grama as gr >>> from grama.models import make_cantilever_beam """ # Check invariants if var is None: raise ValueError("Must provide input columns list as keyword var") if aux: raise ValueError( "Autoplot plot_contour not designed to handle auxiliary variables. " + "Regenerate contour data with fixed auxilary variables, " + "or try creating a manual plot." ) return ( df >> ggplot() + geom_segment( aes( var[0], var[1], xend=var[0]+"_end", yend=var[1]+"_end", linetype=out, color=level, ) ) )
def plot_calibration(label_list, pred_list, names=None, **args): """ カリブレーションカーブを複数描く. :param: label_list: 正解ラベルリストの配列. [(y1, y2, ...), (y1, y2, ...)] のようにして与える, pred_list に対応させる :param: pred_list: 予測確率リストの配列. label_list と同じ長さにすること :param: names=None: モデルの名称. None または同じ長さにすること. 指定しない場合, ラベルの組が 2~3 ならば ['train', 'valid', 'test'] を与える. 3より多い場合は通し番号にする. :param: args: sklearn.metrics.roc_curve に与えるパラメータ. :param: strategy='quantile': 分割方法. 'quantile' または 'uniform' :param: n_bins=10: ビン数. :param: normalize=False: 予測確率の0-1正規化が必要かどうか :return: plotnine オブジェクト TODO: 入力データがすごい偏ってるときの表示範囲 """ if names is None: if len(label_list) == 2: names = ('train', 'test') elif len(label_list) == 3: names = ('train', 'valid', 'test') elif len(label_list) == 1: names = 'model', else: names = list(range(len(label_list))) else: pass if args is None: args = {'strategy': 'quantile', 'n_bins': 5} else: args['strategy'] = args['strategy'] if 'strategy' in args.keys() else 'quantile' args['n_bins'] = args['n_bins'] if 'n_bins' in args.keys() else 10 calib = [calibration_curve(y, p, **args) for y, p in zip(label_list, pred_list)] frac, pred = tuple([list(chain.from_iterable(x)) for x in zip(*calib)][0:2]) models = chain.from_iterable([[name] * l for name, l in zip(names, [len(x) for x, y in calib])]) d_calib = pd.DataFrame({'pred': pred, 'frac': frac, 'model': models}) return ggplot( d_calib, aes(x='pred', y='frac', group='model', color='model') ) + geom_segment(x=0, y=0, xend=1, yend=1, linetype=':', color='grey' ) + geom_line( ) + geom_point( ) + scale_color_discrete(breaks=names ) + labs(x='mean estimated probability', y='fraction of positives' ) + coord_equal(ratio=1) + theme_classic() + theme(figure_size=(4, 4))
def lollipop(data): data = data.sort_values(by=['probability']).reset_index(drop=True) custom_order = pd.Categorical(data['label'], categories=data.label) data = data.assign(label_custom=custom_order) p = ggplot(data, aes('label_custom', 'probability')) + \ geom_point(color = "#88aa88", size = 4) + \ geom_segment(aes(x = 'label_custom', y = 0, xend = 'label_custom', yend = 'probability'), color = "#88aa88") + \ coord_flip(expand=True) + \ theme_minimal() + \ labs(x="", y="probability", title = "Most Likely Object") + \ guides(title_position = "left") + \ theme(plot_title = element_text(size = 20, face = "bold", ha= "right")) fig = p.draw() figfile = BytesIO() plt.savefig(figfile, format='png', bbox_inches='tight') figfile.seek(0) # rewind to beginning of file figdata_png = base64.b64encode(figfile.getvalue()).decode() return p, figdata_png
def plot_cor(df): # drop missing correlations out = df[~df['corr'].isnull()] # add pair column out = out.assign(pair=out.col_1 + '&' + out.col_2) # add a sign column sign = ((out['corr'] > 0).astype('int')).to_list() sign = [['Negative', 'Positive'][i] for i in sign] out['sign'] = sign #out = out.sort_values('pair', ascending = False).reset_index(drop = True) # add ind column out['ind'] = [out.shape[0] - i for i in range(out.shape[0])] # plot using bands ggplt = p9.ggplot(data = out, mapping = p9.aes(x = 'pair', y = 'corr')) \ + p9.geom_hline( yintercept = 0, linetype = "dashed", color = "#c2c6cc" ) \ + p9.geom_rect( alpha = 0.4, xmin = out.ind.values - 0.4, xmax = out.ind.values + 0.4, ymin = out.lower.values, ymax = out.upper.values, fill = [['b', '#abaeb3'][int(x > 0.05)] for x in out.p_value] ) \ + p9.geom_segment( x = out.ind.values - 0.4, y = out['corr'].values, xend = out.ind.values + 0.4, yend = out['corr'].values ) \ + p9.coord_flip() \ + p9.ylim(np.min(out.lower.values), np.max(out.upper.values)) \ + p9.labs(x = "", y = "Correlation") return ggplt
def create_length_plot(len_df, legend_position='right', legend_box='vertical'): mean_len_df = len_df.groupby(['Task', 'Method']).mean().reset_index() mean_len_df[' '] = 'Mean Length' plt = ( ggplot(len_df) + aes(x='x', fill='Method', y='..density..') + geom_histogram(binwidth=2, position='identity', alpha=.6) + geom_text( aes(x='x', y=.22, label='x', color='Method'), mean_len_df, inherit_aes=False, format_string='{:.1f}', show_legend=False ) + geom_segment( aes(x='x', xend='x', y=0, yend=.205, linetype=' '), mean_len_df, inherit_aes=False, color='black' ) + scale_linetype_manual(['dashed']) + facet_wrap('Task') + xlim(0, 20) + ylim(0, .23) + xlab('Example Length') + ylab('Frequency') + scale_color_manual(values=COLORS) + scale_fill_manual(values=COLORS) + theme_fs() + theme( aspect_ratio=1, legend_title=element_blank(), legend_position=legend_position, legend_box=legend_box, ) ) return plt
def scatter_cell_cycle( adata, scores=["signatures", "components"][0], size=1.5, alpha=1, curvature_shrink=1, lab_ypos=2, ): """Plots cell cycle signatures vs pseudotime Parameters ---------------- adata: AnnData The AnnData object being used for the analysis. Must be previously evaluated by `tl.cell_cycle_phase`. scores: str A string indicating what to plot as cell cycle scores against pseudotime. If 'signatures', standard S-phase, G2-M and Histones signatures are used; if 'components', the 4 cell cycle related components are used. size: float Controls the point size of the plot. alpha: float A value between 0 and 1. Controls point transparency. lab_ypos: float Controls the y-axis position of the cell cycle phase annotation. Returns -------------- A plotnine scatter plot of pseudotime vs 3 cell cycle signatures. """ if scores == "signatures": y = ["S-phase", "G2-M", "Histones"] colors = ["#66c2a5", "#fc8d62", "#8da0cb", "black"] elif scores == "components": _add_compScores(adata) y = ["G1/S comp", "G2/M+ comp", "G2/M- comp", "Histones comp"] colors = ["#66c2a5", "#fc8d62", "#8da0cb", "#e5c494", "black"] time_scatter = scatter_pseudotime( adata, y=y, size=size, alpha=alpha) + labs( x="Pseudotime", y="Signature scores", color="Signature") # -- Add cell cycle annotations if "cell_cycle_division" in adata.uns["scycle"]: cc_divs = adata.uns["scycle"]["cell_cycle_division"] # -- Curvature data curv_data = cc_divs["curvature"] curv = curv_data["curvature"].values cvz = zscore(curv) / curvature_shrink cvz = cvz - np.max(cvz) curv_data.loc[:, "curvature"] = cvz curv_data.loc[:, "signature"] = "Curvature" # -- Peak data (for segments) gr_min = np.min(curv_data["curvature"]) pk_data = curv_data[curv_data["ispeak"] == "peak"] pk_data.loc[:, "ymin"] = gr_min # -- Cell cycle annotation cc_phase = pd.DataFrame( dict( starts=[ None, cc_divs["s_start"], cc_divs["g2_start"], cc_divs["m_start"], ], labels=["G1", "S", "G2", "M"], labpos=[ np.mean([0, cc_divs["s_start"]]), np.mean([cc_divs["s_start"], cc_divs["g2_start"]]), np.mean([cc_divs["g2_start"], cc_divs["m_start"]]), np.mean([cc_divs["m_start"], 1]), ], y=lab_ypos, )) cell_cycle_plt = ( time_scatter + geom_point(aes("pseudotime", "curvature", color="signature"), data=curv_data) + geom_line(aes("pseudotime", "curvature"), data=curv_data) + scale_color_manual(values=colors) + geom_segment( aes(x="pseudotime", xend="pseudotime", y="ymin", yend="curvature"), linetype="dotted", data=pk_data, ) + geom_vline( aes(xintercept="starts"), linetype="dashed", data=cc_phase) + geom_text(aes(x="labpos", y="y", label="labels"), data=cc_phase)) return cell_cycle_plt else: return time_scatter
def PlotPG(X, TargetPG, BootPG=None, PGCol="", PlotProjections="none", GroupsLab=None, PointViz="points", Main='', p_alpha=.3, PointSize=None, NodeLabels=None, LabMult=1, Do_PCA=True, DimToPlot=[0, 1], VizMode=("Target", "Boot")): ''' work in progress, only basic plotting supported #' Plot data and principal graph(s) #' #' @param X numerical 2D matrix, the n-by-m matrix with the position of n m-dimensional points #' @param TargetPG the main principal graph to plot #' @param BootPG A list of principal graphs that will be considered as bostrapped curves #' @param PGCol string, the label to be used for the main principal graph #' @param PlotProjections string, the plotting mode for the node projection on the principal graph. #' It can be "none" (no projections will be plotted), "onNodes" (the projections will indicate how points are associated to nodes), #' and "onEdges" (the projections will indicate how points are projected on edges or nodes of the graph) #' @param GroupsLab factor or numeric vector. A vector indicating either a category or a numeric value associted with #' each data point #' @param PointViz string, the modality to show points. It can be 'points' (data will be represented a dot) or #' 'density' (the data will be represented by a field) #' @param Main string, the title of the plot #' @param p.alpha numeric between 0 and 1, the alpha value of the points. Lower values will prodeuce more transparet points #' @param PointSize numeric vector, a vector indicating the size to be associted with each node of the graph. #' If NA points will have size 0. #' @param NodeLabels string vector, a vector indicating the label to be associted with each node of the graph #' @param LabMult numeric, a multiplier controlling the size of node labels #' @param Do_PCA bolean, should the node of the principal graph be used to derive principal component projections and #' rotate the space? If TRUE the plots will use the "EpG PC" as dimensions, if FALSE, the original dimensions will be used. #' @param DimToPlot a integer vector specifing the PCs (if Do_PCA=TRUE) or dimension (if Do_PCA=FALSE) to plot. All the #' combination will be considered, so, for example, if DimToPlot = 1:3, three plot will be produced. #' @param VizMode vector of string, describing the ElPiGraphs to visualize. Any combination of "Target" and "Boot". #' #' @return #' @export #' #' @examples''' if len(PGCol) == 1: PGCol = [PGCol] * len(TargetPG['NodePositions']) if GroupsLab is None: GroupsLab = ["N/A"] * len(X) # levels(GroupsLab) = c(levels(GroupsLab), unique(PGCol)) if PointSize is not None: if (len(PointSize) == 1): PointSize = [PointSize] * len(TargetPG['NodePositions']) if (Do_PCA): # Perform PCA on the nodes mv = TargetPG['NodePositions'].mean(axis=0) data_centered = TargetPG['NodePositions'] - mv vglobal, NodesPCA, explainedVariances = PCA(data_centered) # Rotate the data using eigenvectors BaseData = np.dot((X - mv), vglobal) DataVarPerc = np.var(BaseData, axis=0) / np.sum(np.var(X, axis=0)) else: NodesPCA = TargetPG['NodePositions'] BaseData = X DataVarPerc = np.var(X, axis=0) / np.sum(np.var(X, axis=0)) # Base Data AllComb = list(combinations(DimToPlot, 2)) PlotList = list() for i in range(len(AllComb)): Idx1 = AllComb[i][0] Idx2 = AllComb[i][1] df1 = pd.DataFrame.from_dict( dict(PCA=BaseData[:, Idx1], PCB=BaseData[:, Idx2], Group=GroupsLab)) # Initialize plot Initialized = False if (PointViz == "points"): p = (plotnine.ggplot(data=df1, mapping=plotnine.aes(x='PCA', y='PCB')) + plotnine.geom_point(alpha=p_alpha, mapping=plotnine.aes(color='Group'))) Initialized = True if (PointViz == "density"): p = (plotnine.ggplot(data=df1, mapping=plotnine.aes(x='PCA', y='PCB')) + plotnine.stat_density_2d( contour=True, alpha=.5, geom='polygon', mapping=plotnine.aes(fill='..level..'))) Initialized = True # p = sns.kdeplot(df1['PCA'], df1['PCB'], cmap="Reds", shade=True, bw=.15) if (not Initialized): raise ValueError("Invalid point representation selected") # Target graph tEdg = dict(x=[], y=[], xend=[], yend=[], Col=[]) for i in range(len(TargetPG['Edges'][0])): Node_1 = TargetPG['Edges'][0][i][0] Node_2 = TargetPG['Edges'][0][i][1] if PGCol: if (PGCol[Node_1] == PGCol[Node_2]): tCol = "ElPiG" + str(PGCol[Node_1]) if (PGCol[Node_1] != PGCol[Node_2]): tCol = "ElPiG Multi" if (any(PGCol[(Node_1, Node_2)] == "None")): tCol = "ElPiG None" tEdg['x'].append(NodesPCA[Node_1, Idx1]) tEdg['y'].append(NodesPCA[Node_1, Idx2]) tEdg['xend'].append(NodesPCA[Node_2, Idx1]) tEdg['yend'].append(NodesPCA[Node_2, Idx2]) if PGCol: tEdg['Col'].append(tCol) else: tEdg['Col'].append(1) if (Do_PCA): TarPGVarPerc = explainedVariances.sum() / explainedVariances.sum( ) * 100 else: TarPGVarPerc = np.var(TargetPG['NodePositions'], axis=0) / np.sum( np.var(TargetPG['NodePositions'], axis=0)) df2 = pd.DataFrame.from_dict(tEdg) # Replicas # if(BootPG is not None) and ("Boot" is in VizMode): # AllEdg = lapply(1:length(BootPG), function(i){ # tTree = BootPG[[i]] # if(Do_PCA): # RotData = t(t(tTree$NodePositions) - NodesPCA$center) %*% NodesPCA$rotation # else: { # RotData = tTree$NodePositions # } # tEdg = t(sapply(1:nrow(tTree$Edges$Edges), function(i){ # c(RotData[tTree$Edges$Edges[i, 1],c(Idx1, Idx2)], RotData[tTree$Edges$Edges[i, 2],c(Idx1, Idx2)]) # })) # cbind(tEdg, i) # }) # AllEdg = do.call(rbind, AllEdg) # df3 = data.frame(x = AllEdg[,1], y = AllEdg[,2], xend = AllEdg[,3], yend = AllEdg[,4], Rep = AllEdg[,5]) # p = p + plotnine.geom_segment(data = df3, mapping = plotnine.aes(x=x, y=y, xend=xend, yend=yend), # inherit.aes = False, alpha = .2, color = "black") # Plot projections if (PlotProjections == "onEdges"): if (Do_PCA): Partition = PartitionData(X=BaseData, NodePositions=NodesPCA, MaxBlockSize=100000000, SquaredX=np.sum(BaseData**2, axis=1, keepdims=1), TrimmingRadius=float('inf'))[0] OnEdgProj = project_point_onto_graph(X=BaseData, NodePositions=NodesPCA, Edges=TargetPG['Edges'], Partition=Partition) else: Partition = PartitionData( X=BaseData, NodePositions=TargetPG['NodePositions'], MaxBlockSize=100000000, SquaredX=np.sum(BaseData**2, axis=1, keepdims=1), TrimmingRadius=float('inf'))[0] OnEdgProj = project_point_onto_graph( X=BaseData, NodePositions=TargetPG['NodePositions'], Edges=TargetPG['Edges'], Partition=Partition) ProjDF = pd.DataFrame.from_dict( dict(X=BaseData[:, Idx1], Y=BaseData[:, Idx2], Xend=OnEdgProj['X_projected'][:, Idx1], Yend=OnEdgProj['X_projected'][:, Idx2], Group=GroupsLab)) p = p + plotnine.geom_segment( data=ProjDF, mapping=plotnine.aes( x='X', y='Y', xend='Xend', yend='Yend', col='Group'), inherit_aes=False) elif (PlotProjections == "onNodes"): if (Do_PCA): Partition = PartitionData(X=BaseData, NodePositions=NodesPCA, MaxBlockSize=100000000, SquaredX=np.sum(BaseData**2, axis=1, keepdims=1), TrimmingRadius=float('inf'))[0] ProjDF = pd.DataFrame.from_dict( dict(X=BaseData[:, Idx1], Y=BaseData[:, Idx2], Xend=NodesPCA[Partition, Idx1], Yend=NodesPCA[Partition, Idx2], Group=GroupsLab)) else: Partition = PartitionData( X=BaseData, NodePositions=TargetPG['NodePositions'], MaxBlockSize=100000000, SquaredX=np.sum(BaseData**2, axis=1, keepdims=1), TrimmingRadius=float('inf'))[0] ProjDF = pd.DataFrame.from_dict( dict(X=BaseData[:, Idx1], Y=BaseData[:, Idx2], Xend=TargetPG['NodePositions'][Partition, Idx1], Yend=TargetPG['NodePositions'][Partition, Idx2], Group=GroupsLab)) p = p + plotnine.geom_segment( data=ProjDF, mapping=plotnine.aes( x='X', y='Y', xend='Xend', yend='Yend', col='Group'), inherit_aes=False, alpha=.3) if ("Target" in VizMode): if GroupsLab is not None: p = p + plotnine.geom_segment( data=df2, mapping=plotnine.aes( x='x', y='y', xend='xend', yend='yend', col='Col'), inherit_aes=True) + plotnine.labs(linetype="") else: p = p + plotnine.geom_segment( data=df2, mapping=plotnine.aes( x='x', y='y', xend='xend', yend='yend'), inherit_aes=False) if (Do_PCA): df4 = pd.DataFrame.from_dict( dict(PCA=NodesPCA[:, Idx1], PCB=NodesPCA[:, Idx2])) else: df4 = pd.DataFrame.from_dict( dict(PCA=TargetPG['NodePositions'][:, Idx1], PCB=TargetPG['NodePositions'][:, Idx2])) if ("Target" in VizMode): if (PointSize is not None): p = p + plotnine.geom_point(mapping=plotnine.aes( x='PCA', y='PCB', size=PointSize), data=df4, inherit_aes=False) else: p = p + plotnine.geom_point(mapping=plotnine.aes(x='PCA', y='PCB'), data=df4, inherit_aes=False) # if(NodeLabels): # if(Do_PCA){ # df4 = data.frame(PCA = NodesPCA$x[,Idx1], PCB = NodesPCA$x[,Idx2], Lab = NodeLabels) # else { # df4 = data.frame(PCA = TargetPG$NodePositions[,Idx1], PCB = TargetPG$NodePositions[,Idx2], Lab = NodeLabels) # } # p = p + plotnine.geom_text(mapping = plotnine.aes(x = PCA, y = PCB, label = Lab), # data = df4, hjust = 0, # inherit.aes = False, na.rm = True, # check_overlap = True, color = "black", size = LabMult) # } # if(Do_PCA){ # LabX = "EpG PC", Idx1, " (Data var = ", np.round(100*DataVarPerc[Idx1], 3), "% / PG var = ", signif(100*TarPGVarPerc[Idx1], 3), "%)" # LabY = "EpG PC", Idx2, " (Data var = ", np.round(100*DataVarPerc[Idx2], 3), "% / PG var = ", signif(100*TarPGVarPerc[Idx2], 3), "%)" # else { # LabX = paste0("Dimension ", Idx1, " (Data var = ", np.round(100*DataVarPerc[Idx1], 3), "% / PG var = ", np.round(100*TarPGVarPerc[Idx1], 3), "%)") # LabY = paste0("Dimension ", Idx2, " (Data var = ", np.round(100*DataVarPerc[Idx2], 3), "% / PG var = ", np.round(100*TarPGVarPerc[Idx2], 3), "%)") # } # if(!is.na(TargetPG$FinalReport$FVEP)){ # p = p + plotnine.labs(x = LabX, # y = LabY, # title = paste0(Main, # "/ FVE=", # signif(as.numeric(TargetPG$FinalReport$FVE), 3), # "/ FVEP=", # signif(as.numeric(TargetPG$FinalReport$FVEP), 3)) # ) + # plotnine.theme(plot.title = plotnine.element_text(hjust = 0.5)) # else { # p = p + plotnine.labs(x = LabX, # y = LabY, # title = paste0(Main, # "/ FVE=", # signif(as.numeric(TargetPG$FinalReport$FVE), 3)) # ) + # plotnine.theme(plot.title = plotnine.element_text(hjust = 0.5)) # } PlotList.append(p) return (PlotList)
def cell_cycle_scores(adata, scores=["signatures", "components"][0], size=1.5, alpha=1, curvature_shrink=1, lab_ypos=2, show_curvature=True): """Plots cell cycle signatures vs pseudotime Parameters ---------------- adata: AnnData The AnnData object being used for the analysis. Must be previously evaluated by `tl.cell_cycle_phase`. scores: str A string indicating what to plot as cell cycle scores against pseudotime. If 'signatures', standard S-phase, G2-M and Histones signatures are used; if 'components', the 4 cell cycle related components are used. size: float Controls the point size of the plot. alpha: float A value between 0 and 1. Controls point transparency. lab_ypos: float Controls the y-axis position of the cell cycle phase annotation. show_curvature: Controls whether curvature is shown Returns -------------- A plotnine scatter plot of pseudotime vs 3 cell cycle signatures. """ if scores == "signatures": y = ["G1-S", "G2-M", "Histones"] colors = ['#8ca0c9', '#ff8d68', '#5cc2a6', "black"] elif scores == "components": _add_compScores(adata) y = ["G1-S comp", "G2-M comp", "G2-M- comp", "Histone comp"] colors = ['#8ca0c9', '#ff8d68', "#e5c494", '#5cc2a6', "black"] time_scatter = ( pseudotime_scatter( adata, y=y, facet=False, size=size, alpha=alpha, lab_ypos=lab_ypos) + labs(x="Pseudotime", y="Signature scores", color="Signature")) # -- Add cell cycle annotations if ("cell_cycle_division" in adata.uns["scycle"]) and show_curvature: cc_divs = adata.uns["scycle"]["cell_cycle_division"] # -- Curvature data curv_data = cc_divs["curvature"] curv = curv_data["curvature"].values cvz = zscore(curv) / curvature_shrink cvz = cvz - np.max(cvz) curv_data.loc[:, "curvature"] = cvz curv_data.loc[:, "signature"] = "Curvature" # -- Peak data (for segments) gr_min = np.min(curv_data["curvature"]) pk_data = curv_data[curv_data["ispeak"] == "peak"] pk_data.loc[:, "ymin"] = gr_min cell_cycle_plt = ( time_scatter + geom_point(aes("pseudotime", "curvature", color="signature"), data=curv_data) + geom_line(aes("pseudotime", "curvature"), data=curv_data) + scale_color_manual(values=colors) + geom_segment( aes(x="pseudotime", xend="pseudotime", y="ymin", yend="curvature"), linetype="dotted", data=pk_data, )) return cell_cycle_plt else: return time_scatter + scale_color_manual(values=colors[0:-1])
full_plot_df.head() plot_df = (full_plot_df.sort_values( "odds_ratio", ascending=False).head(subset).append( full_plot_df.sort_values("odds_ratio", ascending=False).iloc[:-2].tail( subset)).replace("rna", "RNA").assign( odds_ratio=lambda x: x.odds_ratio.apply(lambda x: np.log2(x)), lower_odds=lambda x: x.lower_odds.apply(lambda x: np.log2(x)), upper_odds=lambda x: x.upper_odds.apply(lambda x: np.log2(x)), )) plot_df.head() g = (p9.ggplot( plot_df, p9.aes(y="lemma", x="lower_odds", xend="upper_odds", yend="lemma")) + p9.geom_segment(color="#253494", size=6, alpha=0.7) + p9.scale_y_discrete(limits=( plot_df.sort_values("odds_ratio", ascending=True).lemma.tolist())) + p9.scale_x_continuous(limits=(-3, 3)) + p9.geom_vline(p9.aes(xintercept=0), linetype="--", color="grey") + p9.annotate( "segment", x=0.5, xend=2.5, y=1.5, yend=1.5, colour="black", size=0.5, alpha=1, arrow=p9.arrow(length=0.1), ) + p9.annotate(
def gene_profile(genes: list, weights: pd.DataFrame, stddev: pd.DataFrame=None, y_axis_label: str=None, highlight_n: int=None, highlight_anno: list=None, figsize: tuple=None, ylim: tuple=None) -> p9.ggplot: """ Parameters ---------- weights : DataFrame of ES weights genes : a single str or list of genes to include in plot as facets highlight_n : number of highest ESw to highlight highlight_anno : specific annotations to highlight figsize : (float, float), optional (default: None) Specify width and height of plot. Returns ------- g : ggplot Todo: * find a better way for sorting cell-types along x-axis * report if gene in genes is not found in df * report if duplicate genes * replace hacky x-axis labelling """ ### Reduce dataframe to genes of interest genes = [str.upper(s) for s in genes] idx = np.char.upper(weights.index.values.astype(str)) mask = np.isin(idx, genes) df_tidy = weights[mask] n_genes = len(df_tidy) assert (n_genes >= 1), "No matching genes found in dataframe." stddev_tidy = None if stddev is not None: idx = np.char.upper(stddev.index.values.astype(str)) mask = np.isin(idx, genes) stddev_tidy = stddev[mask] n_genes = len(df_tidy) assert (n_genes >= 1), "No matching genes found in stddev dataframe." # Constants, height and width of plot. if figsize is None: H = 5*n_genes W = 15 else: W, H = figsize if ylim is None: ylim = (-1,1) if y_axis_label is None: y_axis_label = "Expression Specificity" ### Convert to tidy / long format if necessary # Org: # ABC ACBG ACMB # POMC 0.0 0.5 0.9 # AGRP 0.2 0.0 0.0 # LEPR 0.1 0.1 0.4 # Tidy: # gene_name annotation es_weight # 1 POMC ABC 0.0 # 2 AGRP ABC 0.6 # 3 LEPR ABC 1.0 df_tidy.index.name = None # ensure that index name is none, so "index" is used for id_vars df_tidy = pd.melt(df_tidy.reset_index(), id_vars="index", var_name="annotation", value_name="weight") if stddev_tidy is not None: stddev_tidy.index.name = None stddev_tidy = pd.melt(stddev_tidy.reset_index(), id_vars="index", var_name="annotation", value_name="stddev") df_tidy = df_tidy.merge(stddev_tidy, on=["index", "annotation"]) ### Sort values by gene_name and es_weight and add order # Sorted: # gene_name annotation es_weight x_order # 1 AGRP MOL2 0.0 1 # 2 AGRP ACNT1 0.1 2 # 3 AGRP MOL1 0.2 3 df_tidy = df_tidy.sort_values(by=["index", "weight"]) df_tidy["order"] = np.arange(len(df_tidy)) + 1 ### Generate highlight # Default: highlight top 5 if ((highlight_n is None) and (highlight_anno is None)): highlight_n = 5 # highlight list of if (highlight_anno is not None): df_tidy["highlight"] = df_tidy["annotation"].isin(highlight_anno) elif (highlight_n is not None): df_tidy["highlight"] = df_tidy.groupby("index")["order"].rank("first", ascending=False) <= highlight_n else: df_tidy["highlight"] = np.array([False] * len(df_tidy)) df_highlight = df_tidy[df_tidy["highlight"]] ### Plot # linear function to compute x_axis text-size. # Mainly depends on number of genes in df per faceet, i.e. len(df_tidy) / len(genes). SIZE_TEXT_X_AXIS = 10.161 - 0.023 * (len(df_tidy) / len(genes)) # Limits of the order for each index gene / facet, e.g. [0, 266, 531] # These limits are necessary to only plot the labels order_lims = [0, *(df_tidy.groupby("index")["order"].max().values)] def find_nearest(array,value): array = np.asarray(array) idx = (np.abs(array - value)).argmin() return array[idx] def getbreaks(lims): # function defined for use in debugging l = find_nearest(order_lims, lims[0]) r = find_nearest(order_lims, lims[1]) breaks = np.arange(l, r) return breaks def getlbls(idx): # function defined for use in debugging idx = idx lbls = df_tidy["annotation"].iloc[idx].values return lbls p = ( ### data p9.ggplot(data=df_tidy, mapping=p9.aes(x="order", y="weight", label="annotation")) ### theming + p9.theme_classic() + p9.theme( figure_size = (W,H), axis_ticks_major_x = p9.element_blank(), axis_text_x = p9.element_text(rotation=75, hjust=0, size=SIZE_TEXT_X_AXIS), # axis_text_y = p9.element_text(size=W), panel_spacing = 1, strip_background = p9.element_blank() ) + p9.ylim(ylim[0],ylim[1]) + p9.labs( x="", # e.g. "Cell-type" y=y_axis_label, # e.g. "ES weight" ) ### viz # all + p9.geom_segment(mapping=p9.aes(x="order", xend="order", y=0, yend="weight"), color="grey", alpha=0.3, show_legend=False ) + p9.geom_point(mapping=p9.aes(size=2), color="grey", show_legend=False ) # highlight + p9.geom_point(data=df_highlight, mapping=p9.aes(size=2), color="dodgerblue", show_legend=False ) + p9.geom_segment(data=df_highlight, mapping=p9.aes(x="order", xend="order", y=0, yend="weight"), color="dodgerblue", alpha=0.3, show_legend=False ) + p9.facet_wrap("index", scales="free", nrow=n_genes ) + p9.scale_x_continuous( # order_scale is continuous across all annotations # so the scale will look weird for each facet, e.g. # facet 1 may have order 1-7, and facet 2 has order 8-14. # therefore we must use a labeller function to get the # correct labels for each interval of order. breaks = lambda lims: getbreaks(lims), labels = lambda idx: getlbls(idx) ) ) if stddev_tidy is not None: p = p + p9.geom_errorbar(mapping=p9.aes(ymin="weight-stddev", ymax="weight+stddev"), color="grey", width=0.1)\ + p9.geom_errorbar(data=df_highlight, mapping=p9.aes(ymin="weight-stddev", ymax="weight+stddev"), color="dodgerblue", width=0.1) # add labels last for them to be on top p = p + p9.geom_label(data=df_highlight, color = "dodgerblue", adjust_text = {'expand_points': (2,2)} ) return p
'theta3c': mech.theta3[1], 'theta4a': mech.theta4[0], 'theta4c': mech.theta4[1], 'omega3a': mech.omega3[0], 'omega3c': mech.omega3[1], 'omega4a': mech.omega4[0], 'omega4c': mech.omega4[1], 'alpha3a': mech.alpha3[0], 'alpha3c': mech.alpha3[1], 'alpha4a': mech.alpha4[0], 'alpha4c': mech.alpha4[1]}, index = [0]) k = 0 plot = ( ggplot(sol) + # MAIN LINKAGE geom_segment(aes(x = 0, y = 0, xend = sol.Ro4[k].real, yend = sol.Ro4[k].imag)) + geom_point(aes(x=0, y=0), shape = 'o', size = 3) + geom_point(aes(x = sol.Ro4[k].real, y = sol.Ro4[k].imag), shape = 'o', size = 3) + # 2ND LINKAGE geom_segment(aes(x = 0, y = 0, xend = sol.Ra[k].real, yend = sol.Ra[k].imag)) + geom_point(aes(x = sol.Ra[k].real, y = sol.Ra[k].imag), shape = 'o', size = 3) + # AP LINKAGE geom_segment(aes(x = sol.Ra[k].real, y = sol.Ra[k].imag, xend = sol.Rpa[k].real, yend = sol.Rpa[k].imag)) + geom_point(aes(x = sol.Rpa[k].real, y = sol.Rpa[k].imag), shape = 'o', size = 3) + # 3RD LINKAGE geom_segment(aes(x = sol.Ra[k].real, y = sol.Ra[k].imag, xend = sol.Rba[k].real, yend = sol.Rba[k].imag)) + geom_point(aes(x = sol.Rba[k].real, y = sol.Rba[k].imag), shape = 'o', size = 3) + # 4TH LINKAGE geom_segment(aes(x = sol.Rba[k].real, y = sol.Rba[k].imag, xend = sol.Ro4[k].real, yend = sol.Ro4[k].imag)) + geom_point(aes(x = sol.Rba[k].real, y = sol.Rba[k].imag), shape = 'o', size = 3) + # NODES IDENTIFICATION
def cli(): parser = argparse.ArgumentParser( description='GAP - Git Activity Predictor') parser.add_argument('paths', metavar='PATH', type=str, nargs='*', default=['.'], help='Paths to one or more git repositories') parser.add_argument( '--date', type=lambda d: dateutil.parser.parse(d).date(), required=False, default=datetime.date.today(), help='Date used for predictions (default to current date)') parser.add_argument('--obs', type=int, required=False, default=20, help='Number of observations to consider') parser.add_argument('--probs', metavar='PROB', type=float, nargs='*', required=False, default=[0.5, 0.6, 0.7, 0.8, 0.9], help='Probabilities to output, strictly in [0,1].') parser.add_argument( '--limit', type=int, required=False, default=30, help= 'Limit contributors to the one that were active at least once during the last x days (default 30)' ) parser.add_argument( '--mapping', type=str, nargs='?', help= 'Mapping file to merge identities. This file must be a csv file where each line contains two values: the name to be merged, and the corresponding identity. Use "IGNORE" as identity to ignore specific names.' ) parser.add_argument('--branches', metavar='BRANCH', type=str, nargs='*', default=list(), help='Git branches to analyse (default to all).') parser.add_argument( '--as-dates', dest='as_dates', action='store_true', help= 'Express predictions using dates instead of time differences in days') group = parser.add_mutually_exclusive_group() group.add_argument('--text', action='store_true', help='Print results as text.') group.add_argument('--csv', action='store_true', help='Print results as csv.') group.add_argument('--json', action='store_true', help='Print results as json.') group.add_argument( '--plot', nargs='?', const=True, help='Export results to a plot. Filepath can be optionaly specified.') args = parser.parse_args() # Default plot location if args.plot is True: args.plot = str(args.date) + '.pdf' # Default to text if not other option is provided if not args.csv and not args.json and not args.plot: args.text = True # Identity mapping if args.mapping: d = pandas.read_csv(args.mapping, names=['source', 'target']) mapping = {r.source: r.target for r in d.itertuples()} else: mapping = {} raw_data = dict() # author -> dates of activity # Get data from git for path in args.paths: try: repo = git.Repo(path) except Exception as e: # Must be refined print('Unable to access repository {} ({}:{})'.format( path, e.__class__.__name__, e)) sys.exit() # Default branches if len(args.branches) == 0: commits = repo.iter_commits('--all') else: commits = repo.iter_commits(' '.join(args.branches)) for commit in commits: try: author = commit.author.name identity = mapping.get(author, author) if author.lower() != 'ignore' and identity.lower() == 'ignore': continue date = datetime.date.fromtimestamp(commit.authored_date) raw_data.setdefault(identity, []).append(date) except Exception as e: print('Unable to read commit ({}: {}): {}'.format( e.__class__.__name__, e, commit)) # Compute durations and apply model data = [] # (author, past activities, predicted durations) for author, commits in raw_data.items(): commits = sorted([e for e in commits if e <= args.date]) durations = dates_to_duration(commits, window_size=args.obs) if len(durations) >= args.obs: # Currently implemented with no censor surv = SurvfuncRight(durations, [1] * len(durations)) predictions = [surv.quantile(p) for p in args.probs] last_day = commits[-1] if last_day >= args.date - datetime.timedelta(args.limit): data.append(( author, commits, predictions, )) # Prepare dataframe df = pandas.DataFrame(index=set([a for a, c, p in data]), columns=['last'] + args.probs) if len(df) == 0: print( 'No author has {} observations and was active at least once during the last {} days' .format(args.obs, args.limit)) sys.exit() df.index.name = 'author' if not args.plot: for author, commits, predictions in data: last = commits[-1] if args.as_dates: df.at[author, 'last'] = last else: df.at[author, 'last'] = (last - args.date).days for prob, p in zip(args.probs, predictions): if args.as_dates: df.at[author, prob] = last + datetime.timedelta(days=int(p)) else: df.at[author, prob] = (last + datetime.timedelta(days=int(p)) - args.date).days df = df.sort_values(['last'] + args.probs, ascending=[False] + [True] * len(args.probs)) df = df.astype(str) if args.text: pandas.set_option('expand_frame_repr', False) pandas.set_option('display.max_columns', 999) print(df) elif args.csv: print(df.to_csv()) elif args.json: print(df.to_json(orient='index')) else: # Because of plotnine's way of initializing matplotlib import warnings warnings.filterwarnings("ignore") VIEW_LIMIT = 28 activities = [ ] # List of (author, day) where day is a delta w.r.t. given date forecasts = [ ] # List of (author, from_day, to_day, p) where probability p # applies between from_day and to_day (delta w.r.t. given date) for author, commits, predictions in data: last = (commits[-1] - args.date).days for e in commits: activities.append((author, (e - args.date).days)) previous = previous_previous = 0 for d, p in zip(predictions, args.probs): if d > previous: forecasts.append((author, last + previous, last + d, p)) previous_previous = previous previous = d else: forecasts.append( (author, last + previous_previous, last + d, p)) activities = pandas.DataFrame(columns=['author', 'day'], data=activities) forecasts = pandas.DataFrame(columns=['author', 'fromd', 'tod', 'p'], data=forecasts) plot = (p9.ggplot(p9.aes(y='author')) + p9.geom_segment( p9.aes('day - 0.5', 'author', xend='day + 0.5', yend='author'), data=activities, size=4, color='orange', ) + p9.geom_segment( p9.aes('fromd + 0.5', 'author', xend='tod + 0.5', yend='author', alpha='factor(p)'), data=forecasts.sort_values('p').drop_duplicates( ['author', 'fromd', 'tod'], keep='last'), size=4, color='steelblue', ) + p9.geom_vline( xintercept=0, color='r', alpha=0.5, linetype='dashed') + p9.scale_x_continuous( name=' << past days {:^20} future days >>'.format( str(args.date)), breaks=range(-VIEW_LIMIT // 7 * 7, (VIEW_LIMIT // 7 * 7) + 1, 7), minor_breaks=6) + p9.scale_y_discrete( name='', limits=activities.sort_values( 'day', ascending=False)['author'].unique()) + p9.scale_alpha_discrete(range=(0.2, 1), name=' ') + p9.coord_cartesian(xlim=(-VIEW_LIMIT, VIEW_LIMIT)) + p9.theme_matplotlib() + p9.theme( figure_size=(6, 4 * activities['author'].nunique() / 15))) fig = plot.draw() fig.savefig(args.plot, bbox_inches='tight') print('Plot exported to {}'.format(args.plot))