def plot_MA(df, core=[], pool=[], file='image.pdf', title="plotMA", c_up='#ff9896', c_not='black', c_down='#aec7e8' ): s = 5 lw = 0 fig, ax = plt.subplots(nrows=1, ncols=1, figsize=(4, 3)) minLogFC = math.log2(2) maxFDR = 0.05 # Divide data into DGE Blocks dfU = df.loc[(df['FDR'] <= maxFDR) & (df['logFC'].abs() >= minLogFC) & (df['logFC'] >= 0), :] dfD = df.loc[(df['FDR'] <= maxFDR) & (df['logFC'].abs() >= minLogFC) & (df['logFC'] <= 0), :] dfN = df.loc[~df.index.isin(dfU.index.tolist() + dfD.index.tolist()), :] # Counts n_up, n_down, n_not = len(dfU), len(dfD), len(dfN) print("Up : {rest:d} rest".format(rest=n_up)) print("Down: {rest:d} rest".format(rest=n_down)) print("Not : {rest:d} rest".format(rest=n_not)) # Plot ax.scatter(dfU['logCPM'], dfU['logFC'], c=c_up, s=s, lw=lw, marker='o', zorder=3, rasterized=True) ax.scatter(dfD['logCPM'], dfD['logFC'], c=c_down, s=s, lw=lw, marker='o', zorder=3, rasterized=True) ax.scatter(dfN['logCPM'], dfN['logFC'], c=c_not, s=s / 3, lw=lw, marker='o', zorder=2, rasterized=True) # Draw a line at y=(-1,0,1) ax.axhline(y=-1, color='b', lw=1, linestyle='--', zorder=5) ax.axhline(y=0, color='gray', lw=1, linestyle='--', zorder=5) ax.axhline(y=+1, color='b', lw=1, linestyle='--', zorder=5) ax.set_xlim(-1, 18) ax.set_ylim(-15, 15) # Labels ax.set_title(title) ax.set_ylabel('logFC') ax.set_xlabel('Average logCPM') # Layout #plt.tight_layout() plt.subplots_adjust(left=0.17, bottom=0.17, right=0.97, top=0.90) # Save ensurePathExists(file) fig.savefig(file, dpi=300)
def plot(df, species, phase, figsize=(4, 3)): fig, ax = plt.subplots(figsize=figsize) # speciest = {'HS': 'Human', 'MM': 'Mouse', 'DM': 'Insect'}[species] color = {'HS': '#2ca02c', 'MM': '#7f7f7f', 'DM': '#ff7f0e'}[species] positions = list(range(len(df.columns))) # ax.plot(positions, df.sample(frac=0.3).T, color=color, lw=1, alpha=0.05, zorder=5, rasterized=False) ax.boxplot(df.values, positions=positions, meanline=True, notch=False, widths=0.6, showfliers=False, medianprops={ 'color': 'black', 'lw': 1 }, zorder=4) # ax.set_title('{species:s} meiotic {phase:s}'.format(species=speciest, phase=phase)) ax.set_xticks(positions) ax.set_xticklabels(df.columns, rotation=90) ax.set_ylabel('Read counts (L2 norm)') #ax.set_xlabel('Sample') plt.tight_layout() #plt.subplots_adjust(left=0.17, bottom=0.17, right=0.97, top=0.90) plt.subplots_adjust(bottom=0.32, top=0.90) wIMGfile = 'images/phase-reads/img-{species:s}-meiotic-{phase:s}.pdf'.format( species=species, phase=phase) ensurePathExists(wIMGfile) fig.savefig(wIMGfile, dpi=150)
] id_gene_MM = [ dict_MM_id_gene_to_id_string[n] for n in id_string_MM if n in dict_MM_id_gene_to_id_string ] id_gene_DM = [ dict_DM_id_gene_to_id_string[n] for n in id_string_DM if n in dict_DM_id_gene_to_id_string ] # only ids already in graph id_gene_HS = [n for n in id_gene_HS if n in set_all_node_ids] id_gene_MM = [n for n in id_gene_MM if n in set_all_node_ids] id_gene_DM = [n for n in id_gene_DM if n in set_all_node_ids] # all pairs for each pairwise product all_pairs = chain(product(*[id_gene_HS, id_gene_MM]), product(*[id_gene_HS, id_gene_DM]), product(*[id_gene_MM, id_gene_DM])) cross_edges.extend(all_pairs) G.add_edges_from(cross_edges, type='cross') ## # Export ## print('Exporting') wGfile_gpickle = 'results/network/net-{network:s}.gpickle'.format( network=network) ensurePathExists(wGfile_gpickle) nx.write_gpickle(G, wGfile_gpickle) print('Done.')
columns = ['id_string_HS', 'id_string_MM', 'id_string_DM'] for column in columns: df[column] = df[column].apply(lambda x: ",".join([str(y) for y in x])) columns = [ 'HS_CyteGonia', 'MM_CyteGonia', 'DM_MiddleApical', 'HS_TidCyte', 'MM_TidCyte', 'DM_BasalMiddle', 'biotype_HS', 'biotype_MM', 'biotype_DM', 'id_gene_HS', 'id_gene_MM', 'id_gene_DM', 'gene_HS', 'gene_MM', 'gene_DM' ] # Export print("> Exporting") wCSVFile = 'results/pipeline-{pipeline:s}/meta_meiotic_genes.csv'.format( pipeline=pipeline) ensurePathExists(wCSVFile) df.to_csv(wCSVFile) # HS wCSVFileHS = 'results/pipeline-{pipeline:s}/HS_meiotic_genes.csv'.format( pipeline=pipeline) ensurePathExists(wCSVFileHS) df_HS.to_csv(wCSVFileHS) # MM wCSVFileMM = 'results/pipeline-{pipeline:s}/MM_meiotic_genes.csv'.format( pipeline=pipeline) ensurePathExists(wCSVFileMM) df_MM.to_csv(wCSVFileMM) wCSVFileDM = 'results/pipeline-{pipeline:s}/DM_meiotic_genes.csv'.format(
def plot_goea(df, celltype='spermatocyte', species='HS', facecolor='red', ns='BP'): df = df.copy() # Select df = df.loc[(df['NS'] == ns), :] # Trim df = df.loc[(df['depth'] >= 5), :] # All zeros are set to the smallest computable float df.loc[df['p_fdr_bh'] == 0.0, 'p_fdr_bh'] = np.nextafter(0, 1) # df['1-log(p)'] = 1 - (np.log(df['p_fdr_bh'])) print('Plotting GOEA Bars: {celltype:s} {species} {ns:s}'.format(celltype=celltype, species=species, ns=ns)) species_str = dict_species[species] ns_str = dict_ns[ns] df = df.sort_values('1-log(p)', ascending=False) # dft10 = df.iloc[:10, :].sort_values('1-log(p)', ascending=True) sl = 75 # string slice dft10['name'] = dft10['name'].apply(lambda x: x[0:sl] + '..' if len(x) > sl else x) if len(dft10) == 0: print('No significant GOs.') return None # Plot fig, ax = plt.subplots(figsize=(4.7, 3.0)) # P-values title = 'GO enrichment - {species:s} {ns:s}'.format(species=species_str, ns=ns_str) ind = np.arange(0, len(dft10), 1) bp = ax.barh(ind, 1 - np.log(dft10['p_fdr_bh']), height=0.8, facecolor=facecolor, zorder=4) ax.set_title(title, fontsize='large') minx, maxx = ax.get_xlim() for bar, name in zip(bp.patches, dft10['name'].tolist()): bx = bar.get_x() by = bar.get_y() bh = bar.get_height() # bw = bar.get_width() tx = bx + (0.01 * maxx) ty = (by + (bh / 2)) ax.text(x=tx, y=ty, s=name, ha='left', va='center', fontsize='x-small', zorder=5) # ax.axvline(x=(1 - math.log(0.01)), color='#666666', ls='dotted') ax.axvline(x=(1 - math.log(0.05)), color='#c7c7c7', ls='dashed') ax.set_yticks(ind) ax.set_yticklabels(dft10['GO']) ax.set_xlabel(r'$1 - $log($p$-value)') ax.set_ylim(-0.7, (10 - 0.3)) ax.grid(axis='x', zorder=1) plt.subplots_adjust(left=0.21, right=0.97, bottom=0.17, top=0.89) #plt.tight_layout() # wIMGFile = 'images/goea-bars/img-goea-bars-{celltype:s}-{species:s}-core-genes-{ns:s}.pdf'.format(celltype=celltype, species=species, ns=ns) print(wIMGFile) ensurePathExists(wIMGFile) plt.savefig(wIMGFile, dpi=300, bbox_inches=None, pad_inches=0.0) plt.close()
FROM dw_interaction i WHERE i.age IS NOT NULL GROUP BY i.id_patient, i.age ) as t GROUP BY t.age """ dfi = pd.read_sql(sqli, con=engine, index_col='age') # Map age to age_group dfi['age_group'] = map_age_to_age_group(dfi.index) # Group by age_group dfi = dfi.groupby('age_group').agg({'patient-inter': 'sum'}) # Concat Results dfr = pd.concat([dfp, dfc, dfi], axis='columns', sort='False').fillna(0) # Relative Risk of CoAdministration (per gender) dfr['RRC^{g=F}'] = (dfr['patient-coadmin'] / dfr['patient']) / ( dfr.loc['Male', 'patient-coadmin'] / dfr.loc['Male', 'patient']) # Relative Risk of Interaction (per gender) dfr['RRI^{g=F}'] = (dfr['patient-inter'] / dfr['patient']) / ( dfr.loc['Male', 'patient-inter'] / dfr.loc['Male', 'patient']) print(dfr) # Export wCSVfile = 'results/age.csv' ensurePathExists(wCSVfile) dfr.to_csv(wCSVfile)
'matches': [] } for match in s.get_unique_matches(): for mid in match.id: mj['matches'].append({ 'id': mid, 'id_parent': dict_id_parent[mid], 'token': dict_token[mid], 'parent': dict_parent[mid], 'type': dict_type[mid] }) list_post_mentions.append(mj) print( 'nr_posts: {n_posts:d} | nr_matched_posts: {n_posts_with_matches:d}' .format(n_posts=n_posts, n_posts_with_matches=n_posts_with_matches)) if n_posts_with_matches <= 0: print('> NO MATCHED POSTS, SKIPPING') continue # to DataFrame dfR = pd.DataFrame(list_post_mentions) # Export wCSVfile = '../tmp-data/01-instagram-epilepsy-mentions-{dicttimestamp:s}.csv.gz'.format( dicttimestamp=dicttimestamp) utils.ensurePathExists(wCSVfile) dfR.to_csv(wCSVfile)
ax.set_ylabel('Weight') ax.set_xlabel('Edge rank') ax.set_xscale('log') axin.set_ylabel('Probability', fontsize='small') axin.set_xlabel('Weight', fontsize='small') axin.set_xticks([0.2, 0.5, 1.0]) # Legend ax.legend(handles=(phs, pmm, pdm), labels=('Human', 'Mouse', 'Insect'), loc='lower left') # Grid ax.grid(zorder=1) axin.grid(zorder=1) plt.subplots_adjust(left=0.12, right=0.97, bottom=0.12, top=0.92, wspace=0, hspace=0) img_path = 'images/net-edge-attributes/{celltype:s}/'.format( celltype=celltype) file = img_path + 'img-net-{celltype:s}-full-edge-{attribute:s}-dist.pdf'.format( celltype=celltype, attribute=attribute) ensurePathExists(file) fig.savefig(file) plt.close()
print('Computing Backbone ({layer:s})'.format(layer=layer)) print('Create empty graph ({layer:s})'.format(layer=layer)) B = nx.Graph() B.add_nodes_from(Gtmp.nodes()) B.add_edges_from(Gtmp.edges()) # # Compute Backbones # print('Dijkstra ({layer:s})'.format(layer=layer)) dict_edges_backbone, dict_edges_s_values = compute_metric_backbone( Gtmp) # To DataFrame dfB = pd.DataFrame({ 'backbone': dict_edges_backbone, 's_values': dict_edges_s_values }) ## # Export ## print('Exporting ({layer:s})'.format(layer=layer)) wBfile = 'results/backbone/{celltype:s}/net-{celltype:s}-{network:s}-{threshold:s}-{layer:s}-backbone.csv.gz'.format( celltype=celltype, network=network, threshold=threshold_str, layer=layer) ensurePathExists(wBfile) dfB.to_csv(wBfile)
def plot_distance_and_angles(celltype, network, threshold, layer, radius_window, radius_overlap, angle_window, angle_overlap): """ Plots Distance and Angles """ threshold_str = str(threshold).replace('.', 'p') print('Plotting Distance & Angles for {celltype:s}-{network:s}-{threshold:s}-{layer:s}'.format(celltype=celltype, network=network, threshold=threshold_str, layer=layer)) rDiAnFile = '../../04-network/results/pca/{celltype:s}/{layer:s}/pca-{celltype:s}-{network:s}-{threshold:s}-{layer:s}-dian.csv.gz'.format(celltype=celltype, network=network, threshold=threshold_str, layer=layer) rEntrFile = '../../04-network/results/pca/{celltype:s}/{layer:s}/pca-{celltype:s}-{network:s}-{threshold:s}-{layer:s}-entropy.csv.gz'.format(celltype=celltype, network=network, threshold=threshold_str, layer=layer) # df_dian = pd.read_csv(rDiAnFile, index_col=0) df_ent = pd.read_csv(rEntrFile, index_col=0) df_cp = df_ent.loc[df_ent['cut-rank'].notnull(), :].sort_values(['dim', 'cut-rank']) # cyc = (cycler(color=['#1f77b4', '#ff7f0e', '#2ca02c']) + cycler(linestyle=['solid', 'dashed', 'dotted']))() # for dim in range(1, 10): print('- Dim: {:d}'.format(dim)) df_ent_tmp = df_ent.loc[df_ent['dim'] == dim].copy() df_cp_tmp = df_cp.loc[(df_cp['dim'] == dim), :] # fig, ax = plt.subplots(figsize=(3.66, 3)) axt = ax.twinx() # cx = str(dim) + 'c' cy = str(dim + 1) + 'c' dist_label = '{cx:s}-{cy:s}-dist'.format(cx=cx, cy=cy) angle_label = '{cx:s}-{cy:s}-angle'.format(cx=cx, cy=cy) facecolors = '#c7c7c7' edgecolors = 'black' df_dian = df_dian.sort_values([dist_label, angle_label], ascending=[True, True]) xs = df_dian[dist_label] ys = df_dian[angle_label] # # ax.scatter(xs, ys, c=facecolors, marker='o', edgecolors=edgecolors, lw=0.2, s=10, zorder=4, rasterized=True) axt.plot(df_ent_tmp['radius-start'], df_ent_tmp['entropy-norm'], color='#d62728', zorder=6, marker='.', markersize=3, lw=0) axt.plot(df_ent_tmp['radius-start'], df_ent_tmp['entropy-smooth'], color='#ff9896', zorder=5) # Plot Cut Points for idx, cut_rank, radius in df_cp_tmp[['cut-rank', 'radius-start']].to_records(): props = next(cyc) ax.axvline(x=radius, zorder=6, **props) # ax.set_title('Components {dim1} and {dim2}'.format(dim1=dim, dim2=(dim + 1))) ax.set_xlabel(r'radius ($\theta_w = {radius_window:.2f}, \theta_o = {radius_overlap:.2f}$)'.format(radius_window=radius_window, radius_overlap=radius_overlap)) ax.set_ylabel(r'angle ($\varphi_w = {angle_window:d}, \varphi_o = {angle_overlap:d}$)'.format(angle_window=angle_window, angle_overlap=angle_overlap)) yticks = [-180, -135, -90, -45, 0, 45, 90, 135, 180] #yticklabels = [r'$-\pi(180\degree)$', r'$-\frac{3\pi}{4}(135\degree)$', r'$-\frac{\pi}{2}(90\degree)$', r'$-\frac{\pi}{4}(45\degree)$', r'$0(0)$', r'$\frac{\pi}{4}(45\degree)$', r'$\frac{\pi}{2}(90\degree)$', r'$\frac{3\pi}{4}(135\degree)$', r'$\pi(180\degree)$'] yticklabels = [r'$-\pi$', r'$-\frac{3\pi}{4}$', r'$-\frac{\pi}{2}$', r'$-\frac{\pi}{4}$', r'$0$', r'$\frac{\pi}{4}$', r'$\frac{\pi}{2}$', r'$\frac{3\pi}{4}$', r'$\pi$'] ax.set_yticks(yticks) ax.set_yticklabels(yticklabels, fontsize='medium') # axt.set_ylabel('entropy (normed)') axt.set_ylim(0, 1) ax.grid() # plt.subplots_adjust(left=0.17, right=0.84, bottom=0.17, top=0.89) #plt.tight_layout() wIMGFile = 'images/pca-entropy/{celltype:s}/{layer:s}/img-entropy-{celltype:s}-{network:s}-{threshold:s}-{layer:s}-C{dimx:d}x{dimy:d}.pdf'.format(celltype=celltype, network=network, threshold=threshold_str, layer=layer, dimx=dim, dimy=(dim + 1)) ensurePathExists(wIMGFile) plt.savefig(wIMGFile, dpi=300) plt.close()
layer=layer)) rPCAFile = 'results/pca/{celltype:s}/{layer:s}/pca-{celltype:s}-{network:s}-{threshold:s}-{layer:s}-dim.csv.gz'.format( celltype=celltype, network=network, threshold=threshold_str, layer=layer) wDiAnFile = 'results/pca/{celltype:s}/{layer:s}/pca-{celltype:s}-{network:s}-{threshold:s}-{layer:s}-dian.csv.gz'.format( celltype=celltype, network=network, threshold=threshold_str, layer=layer) wEntrFile = 'results/pca/{celltype:s}/{layer:s}/pca-{celltype:s}-{network:s}-{threshold:s}-{layer:s}-entropy.csv.gz'.format( celltype=celltype, network=network, threshold=threshold_str, layer=layer) # df_pca = pd.read_csv(rPCAFile, index_col=0, encoding='utf-8') # df_ent, df_dian = compute_entropy(df_pca, radius_window=radius_window, radius_overlap=radius_overlap, angle_window=angle_window, angle_overlap=angle_overlap, components=9) # ensurePathExists(wDiAnFile) ensurePathExists(wEntrFile) df_ent.to_csv(wEntrFile) df_dian.to_csv(wDiAnFile)
def plot_goea(celltype='spermatocyte', network='thr', threshold=0.5, layer='DM', modules=[]): rCSVFile = 'results/goea/{celltype:s}/goea-{celltype:s}-{network:s}-{threshold:s}-{layer:s}.csv.gz'.format(celltype=celltype, network=network, threshold=threshold_str, layer=layer) df = pd.read_csv(rCSVFile) # Trim df = df.loc[(df['depth'] >= 5), :] # All zeros are set to the smallest computable float df.loc[df['p_fdr_bh'] == 0.0, 'p_fdr_bh'] = np.nextafter(0, 1) # df['1-log(p)'] = 1 - (np.log(df['p_fdr_bh'])) print('Plotting GOEA Bars: {celltype:s} - {network:s} - {threshold:s} - {layer}'.format(celltype=celltype, network=network, threshold=threshold_str, layer=layer)) specie = dict_specie[layer] for module in modules: mid = module['id'] mname = module['name'] print("Module: M{mid:d}-{mname:s}".format(mid=mid, mname=mname)) facecolor = module['facecolor'] dft = df.loc[(df['module-id'] == mid), :].sort_values('1-log(p)', ascending=False) # dft10 = dft.iloc[:10, :].sort_values('1-log(p)', ascending=True) sl = 75 # string slice dft10['name'] = dft10['name'].apply(lambda x: x[0:sl] + '..' if len(x) > sl else x) if len(dft10) == 0: print('No significant GOs.') continue # Plot fig, ax = plt.subplots(figsize=(4.7, 3.0)) # P-values title = 'GOEA-{specie:s} {celltype:s} M{mid:d}-{mname:s}'.format(specie=specie, celltype=celltype, mid=mid, mname=dict_replace[mname]) ind = np.arange(0, len(dft10), 1) bp = ax.barh(ind, 1 - np.log(dft10['p_fdr_bh']), height=0.8, facecolor=facecolor, zorder=4) ax.set_title(title, fontsize='large') minx, maxx = ax.get_xlim() for bar, name in zip(bp.patches, dft10['name'].tolist()): bx = bar.get_x() by = bar.get_y() bh = bar.get_height() # bw = bar.get_width() tx = bx + (0.01 * maxx) ty = (by + (bh / 2)) ax.text(x=tx, y=ty, s=name, ha='left', va='center', fontsize='x-small', zorder=5) # ax.axvline(x=(1 - math.log(0.01)), color='#666666', ls='dotted') ax.axvline(x=(1 - math.log(0.05)), color='#c7c7c7', ls='dashed') ax.set_yticks(ind) ax.set_yticklabels(dft10['GO']) ax.set_xlabel(r'$1 - $log($p$-value)') ax.set_ylim(-0.7, (10 - 0.3)) ax.grid(axis='x', zorder=1) plt.subplots_adjust(left=0.21, right=0.97, bottom=0.17, top=0.89) #plt.tight_layout() # wIMGFile = 'images/goea-bars/{celltype:s}/{layer:s}/img-goea-bars-{celltype:s}-{network:s}-{threshold:s}-{layer:s}-M{mid:d}.pdf'.format(celltype=celltype, network=network, threshold=threshold_str, layer=layer, mid=mid) ensurePathExists(wIMGFile) plt.savefig(wIMGFile, dpi=300, bbox_inches=None, pad_inches=0.0) plt.close()
def plot_wordcloud(celltype='spermatocyte', network='thr', threshold=0.5, layer='DM', modules=[]): celltype_str = celltype.title() rCSVFile = 'results/goea/{celltype:s}/goea-{celltype:s}-{network:s}-{threshold:s}-{layer:s}.csv.gz'.format(celltype=celltype, network=network, threshold=threshold_str, layer=layer) df = pd.read_csv(rCSVFile) # Trim df = df.loc[(df['depth'] >= 5), :] # All zeros are set to the smallest computable float df.loc[df['p_fdr_bh'] == 0.0, 'p_fdr_bh'] = np.nextafter(0, 1) # df['1-log(p)'] = 1 - (np.log(df['p_fdr_bh'])) specie = dict_specie[layer] # english_stopwords = stopwords.words('english') print('Plotting GOEA Wordcloud: {celltype:s} - {network:s} - {threshold:s} - {layer}'.format(celltype=celltype, network=network, threshold=threshold_str, layer=layer)) for module in modules: mid = module['id'] mname = module['name'] text_color = module['facecolor'] # print("Module: M{mid:d}-{mname:s}".format(mid=mid, mname=mname)) # WordCloud dft = df.loc[(df['module-id'] == mid), :] text = ' '.join(dft['name'].tolist()) if len(text) == 0: print('No significant GOs.') continue text = text.replace('-', ' ') # fig, ax = plt.subplots(figsize=(4.0, 3.0)) def color_func(*args, **kwargs): return (0, 0, 0) wordcloud = WordCloud(background_color='white', max_font_size=45, width=400, height=300, stopwords=english_stopwords, relative_scaling='auto', colormap='tab10', color_func=color_func, collocation_threshold=20) def calc_frequencies(dfA): r = [] for i, dfAt in dfA.iterrows(): name = dfAt['name'] pvalue = dfAt['1-log(p)'] name = name.replace('-', ' ').replace(',', '').replace('.', '').replace("'", '') for word in name.split(' '): if word not in english_stopwords: r.append((i, word, pvalue)) dfr = pd.DataFrame(r, columns=['id', 'name', 'pvalue']).set_index('id') dfr['name'] = dfr['name'].replace('proteasomal', 'proteasome') # dfrg = dfr.groupby('name').agg({'pvalue': ['count', 'sum']}) dfrg.columns = dfrg.columns.droplevel() dfrg['frequency'] = dfrg['count'].rank(method='min') * dfrg['sum'].rank(method='min') dfrg.sort_values('frequency', ascending=False, inplace=True) return dfrg.reset_index().set_index('name')['frequency'].to_dict() frequencies = calc_frequencies(dft) wordcloud.generate_from_frequencies(frequencies) # wordcloud.generate_from_text(text) def color_func(word, font_size, position, orientation, random_state=None, **kwargs): if word in data_text_color[mid]: return text_color else: return 'black' # Recolor wordcloud.recolor(color_func=color_func) title = 'GOEA-{specie:s} {celltype:s} M{mid:d}-{mname:s}'.format(specie=specie, celltype=celltype_str, mid=mid, mname=dict_replace[mname]) ax.set_title(title) # wp = ax.imshow(wordcloud, interpolation='bilinear') # ax.set_xticks([]) ax.set_yticks([]) plt.subplots_adjust(left=0.03, right=0.97, bottom=0.17, top=0.89) # wIMGFile = 'images/goea-wordcloud/{celltype:s}/{layer:s}/img-goea-wc-{celltype:s}-{network:s}-{threshold:s}-{layer:s}-mod-{mid:d}.pdf'.format(celltype=celltype, network=network, threshold=threshold_str, layer=layer, mid=mid) ensurePathExists(wIMGFile) plt.savefig(wIMGFile, dpi=300, bbox_inches=None, pad_inches=0.0) plt.close()
# json print('> json') jsondata = { 'directed': False, 'graph': [], 'nodes': [{ 'id': i, **d } for i, d in Gtc.nodes(data=True)], 'edges': [{ 'from': i, 'to': j, **d } for i, j, d in Gtc.edges(data=True)] } wGtcfile_json = 'results/json/net_{network:s}-{level:s}-{layer:s}-SVD-{component:s}.json'.format( network=network, level=levelstr, layer=layer, component=component) ensurePathExists(wGtcfile_json) with open(wGtcfile_json, 'w') as outfile: json.dump(jsondata, outfile, indent=4) """ # graphml print('> graphml') wGtcfile_graphml = 'results/graphml/net_{network:s}-{layer:s}-SVD-{component:s}.graphml'.format(network=network, layer=layerstr, component=component) ensurePathExists(wGtcfile_graphml) nx.write_graphml(Gtc, wDMGfile_graphml) """
def plot_indianapolis_map(gdf=None, var=None, vmin=None, vmax=None, cmap=None, title='', legend_label='', legend_format=None, wIMGfile=None): # Plot fig, ax = plt.subplots(figsize=(6, 6), nrows=1, ncols=1) cax = fig.add_axes([0.15, 0.06, 0.70, 0.021]) ax.set_title(title) # Patients """ pp = gzip.plot(ax=ax, column='n-patients', cmap='jet', lw=0, legend=True, legend_kwds={ 'label':'Patients with at least one drug dispensation', 'orientation':'horizontal', 'format':FuncFormatter(lambda x, p: "{x:,.0f}".format(x=x))}, cax=cax, zorder=3) """ # Variable pp = gdf.plot(column=var, cmap=cmap, ax=ax, lw=0, legend=True, legend_kwds={ 'label': legend_label, 'orientation': 'horizontal', 'format': legend_format, }, vmin=vmin, vmax=vmax, cax=cax, zorder=3) # ZCTA boundaries gdp1.boundary.plot(ax=ax, lw=1, edgecolor='#c7c7c7', zorder=4) # Counties gcounties.boundary.plot(ax=ax, lw=1.5, color='#d62728', zorder=8) # Highways ghighways.plot(ax=ax, lw=1, color='#7f7f7f', zorder=7) # Names def label_geometry(x): point = x['geometry'].representative_point() if zoom_polygon.contains(point): ax.text(x=point.x, y=point.y, s=x.get('ZCTA5CE10', ''), ha='center', fontsize='xx-small', zorder=12) gdp1.apply(label_geometry, axis=1) def xy_format(x, pos): return "{x:.0f}".format(x=(x / 1e4)) ax.plot(*zoom_polygon.exterior.xy, color='green', lw=1) # Axis Label y_formatter = FuncFormatter(xy_format) x_formatter = FuncFormatter(xy_format) ax.xaxis.set_major_formatter(x_formatter) ax.yaxis.set_major_formatter(y_formatter) # Zoom in Marion County # minx, miny, maxx, maxy = gdp1.total_bounds minx, miny, maxx, maxy = zoom_polygon.bounds ax.set_xlim(minx, maxx) ax.set_ylim(miny, maxy) # Save plt.subplots_adjust(left=0.10, right=0.95, bottom=0.14, top=0.93, wspace=0.0, hspace=0.0) ensurePathExists(wIMGfile) fig.savefig(wIMGfile) plt.close()
# Generate dfX dfNet = pd.DataFrame.from_dict(dict(Gtc.nodes(data=True)), orient='index') # Merge DataFrames dfX = pd.concat([dfNet, dfSVD], axis='columns') # Calculate y """ def calc_y(r): if r.get('mean-fert-rate', 1.0) < 0.7: return True elif not pd.isnull(r.get('known-DM-phenotype', None)): return True elif not pd.isnull(r.get('new-DM-phenotype', None)): return True else: return False dfX['y'] = dfX.apply(calc_y, axis='columns') """ ## # Export ## print('Saving results to .CSV') wMLFile = 'results/ml/{celltype:s}/{layer:s}/ml-{celltype:s}-{layer:s}-mod-{mid:d}.csv.gz'.format( celltype=celltype, layer=layer, mid=mid) ensurePathExists(wMLFile) dfX.to_csv(wMLFile)
# columns = ['{:d}c'.format(i) for i in range(1, components + 1)] df_pca = pd.DataFrame(res[:, 0:components], columns=columns, index=dfG.index) df_pca = pd.concat([dfG, df_pca], axis='columns') # s_pca_var = pd.Series(pca.explained_variance_ratio_, index=range(1, (res.shape[1] + 1)), name='explained_variance_ratio') print('Saving results to .CSV') wPCAFile = 'results/pca/{celltype:s}/{layer:s}/pca-{celltype:s}-{network:s}-{threshold:s}-{layer:s}-dim.csv.gz'.format( celltype=celltype, network=network, threshold=threshold_str, layer=layer) wSFile = 'results/pca/{celltype:s}/{layer:s}/pca-{celltype:s}-{network:s}-{threshold:s}-{layer:s}-s.csv.gz'.format( celltype=celltype, network=network, threshold=threshold_str, layer=layer) # ensurePathExists(wPCAFile) ensurePathExists(wSFile) # df_pca.to_csv(wPCAFile) s_pca_var.to_csv(wSFile, header=True) print('Done.')
nx.set_node_attributes(Gt, values=dict_conserved, name='conserved') if add_core: rCOREFile = '../../02-core_genes/results/pipeline-core/{layer:s}_meiotic_genes.csv'.format( layer=layer) dfC = pd.read_csv(rCOREFile, index_col=0) dict_core = {gene: True for gene in dfC.index.tolist()} # nx.set_node_attributes(Gt, values=dict_core, name='core') # Remove Isolates if remove_isolates: isolates = list(nx.isolates(Gt)) print('Removing {n:d} isolated nodes'.format(n=len(isolates))) Gt.remove_nodes_from(isolates) # Largest Connected Component if only_largest_component: Gt = get_network_largest_connected_component(Gt) # graphml print('Export to graphml') if network == 'thr': wGtfile_graphml = '../gephi-plotting/results/graphml/{celltype:s}/net-{celltype:s}-{network:s}-{threshold:s}-{layer:s}.graphml'.format( celltype=celltype, network=network, threshold=threshold_str, layer=layer) ensurePathExists(wGtfile_graphml) nx.write_graphml(Gt, wGtfile_graphml)
df_DM = pd.read_csv( 'results/{pipeline:s}/DM_meiotic_genes.csv'.format(pipeline=pipeline), index_col='id_string', usecols=['id_gene', 'id_string', 'gene']) def map_multiple_ids(x, d): x = x.split(',') return ','.join([d[i] for i in x]) df['id_gene_HS'] = df['id_string_HS'].apply( map_multiple_ids, args=(df_HS['id_gene'].to_dict(), )) df['id_gene_MM'] = df['id_string_MM'].apply( map_multiple_ids, args=(df_MM['id_gene'].to_dict(), )) df['id_gene_DM'] = df['id_string_DM'].apply( map_multiple_ids, args=(df_DM['id_gene'].to_dict(), )) df['gene_HS'] = df['id_string_HS'].apply(map_multiple_ids, args=(df_HS['gene'].to_dict(), )) df['gene_MM'] = df['id_string_MM'].apply(map_multiple_ids, args=(df_MM['gene'].to_dict(), )) df['gene_DM'] = df['id_string_DM'].apply(map_multiple_ids, args=(df_DM['gene'].to_dict(), )) print("> Exporting") wCSVFile = 'results/{pipeline:s}/meta_meiotic_genes_4Paulo.csv'.format( pipeline=pipeline) ensurePathExists(wCSVFile) df.to_csv(wCSVFile) print('done.')
]) # first9 = first9 + others rects_first9 = ax.bar(ind, first9, width, bottom=others, color='#636363', edgecolor='#969696', lw=1, zorder=9) rects_others = ax.bar(ind, others, width, bottom=0, color='#bdbdbd', edgecolor='#d9d9d9', lw=1, zorder=9) bar_labels(ax=ax, rects=rects_first9) ax.set_xticks(ind) ax.set_xticklabels(['Human', 'Mouse', 'Insect'], fontsize='small') ax.set_ylabel('Variance') ax.set_xlim(-0.4, 2.7) plt.subplots_adjust(left=0.21, right=0.97, bottom=0.17, top=0.89) ensurePathExists(wIMGFile) plt.savefig(wIMGFile, dpi=150, bbox_inches=None, pad_inches=0.0) plt.close()
def plot_pca(celltype='spermatocyte', network='thr', threshold=0.5, layer='DM', modules=[]): """ Plot PCA """ threshold_str = str(threshold).replace('.', 'p') # print('Plotting PCA for {celltype:s}-{network:s}-{threshold:s}-{layer:s}'. format(celltype=celltype, network=network, threshold=threshold_str, layer=layer)) rPCAFile = '../../04-network/results/pca/{celltype:s}/{layer:s}/pca-{celltype:s}-{network:s}-{threshold:s}-{layer:s}-dim.csv.gz'.format( celltype=celltype, network=network, threshold=threshold_str, layer=layer) rDiAnFile = '../../04-network/results/pca/{celltype:s}/{layer:s}/pca-{celltype:s}-{network:s}-{threshold:s}-{layer:s}-dian.csv.gz'.format( celltype=celltype, network=network, threshold=threshold_str, layer=layer) rEntFile = '../../04-network/results/pca/{celltype:s}/{layer:s}/pca-{celltype:s}-{network:s}-{threshold:s}-{layer:s}-entropy.csv.gz'.format( celltype=celltype, network=network, threshold=threshold_str, layer=layer) rSFile = '../../04-network/results/pca/{celltype:s}/{layer:s}/pca-{celltype:s}-{network:s}-{threshold:s}-{layer:s}-s.csv.gz'.format( celltype=celltype, network=network, threshold=threshold_str, layer=layer) df_pca = pd.read_csv(rPCAFile, index_col=0) df_dian = pd.read_csv(rDiAnFile, index_col=0) df_ent = pd.read_csv(rEntFile, index_col=0) s = pd.read_csv(rSFile, squeeze=True, index_col=0, header=0, encoding='utf-8') df_cp = df_ent.loc[df_ent['cut-rank'].notnull(), :].sort_values( ['dim', 'cut-rank']) # cyc = (cycler(edgecolor=['#1f77b4', '#ff7f0e', '#2ca02c']) + cycler(linestyle=['solid', 'dashed', 'dotted']))() # Plot Variance s_cumsum = s.cumsum() n_eigen_95 = s_cumsum[(s_cumsum < 0.95)].shape[0] n = 9 ind = np.arange(n) height = s.iloc[:n].values width = 0.60 xticklabels = (ind + 1) fig, ax = plt.subplots(figsize=(3, 3)) ax.bar(ind, height, width, color='#636363', edgecolor='#969696', zorder=9, lw=1) ax.set_xticks(ind) ax.set_xticklabels(xticklabels) species_name = {'HS': 'Human', 'MM': 'Mouse', 'DM': 'Insect'} title = '{species:s}'.format(species=species_name[layer]) ax.set_title(title) ax.annotate('95% with {:,d}\nsingular vectors'.format(n_eigen_95), xy=(0.97, 0.97), xycoords="axes fraction", ha='right', va='top', fontsize='small') ax.set_xlabel('Components') ax.set_ylabel('Variance') ax.grid(axis='y') plt.subplots_adjust(left=0.21, right=0.97, bottom=0.17, top=0.89) #plt.tight_layout() wIMGFile = 'images/pca-entropy/{celltype:s}/{layer:s}/img-pca-{celltype:s}-{network:s}-{threshold:s}-{layer:s}-variance.pdf'.format( celltype=celltype, network=network, threshold=threshold_str, layer=layer) ensurePathExists(wIMGFile) plt.savefig(wIMGFile, dpi=150, bbox_inches=None, pad_inches=0.0) plt.close() # Plot Projections for dim in range(1, 9): print('- Dim: {dim:d}'.format(dim=dim)) # col = str(dim) + 'c' x = str(dim) + 'c' y = str(dim + 1) + 'c' xs = df_pca[x].tolist() ys = df_pca[y].tolist() # facecolors = '#c7c7c7' edgecolors = 'black' # '#c7c7c7' fig, ax = plt.subplots(figsize=(3, 3)) ax.scatter(xs, ys, c=facecolors, marker='o', edgecolor=edgecolors, lw=0.2, s=10, zorder=5, rasterized=True) # Draw a X at the center ax.plot(0, 0, color='#2ca02c', marker='x', ms=16) # Draw lines at the center ax.axhline(y=0, c='black', lw=0.75, ls='-.', zorder=2) ax.axvline(x=0, c='black', lw=0.75, ls='-.', zorder=2) ax.set_title('Components {dim1} and {dim2}'.format(dim1=dim, dim2=(dim + 1))) ax.set_xlabel('Component {dim1:d}'.format(dim1=dim)) ax.set_ylabel('Component {dim2:d}'.format(dim2=dim + 1)) ax.grid() xlimmin, xlimmax = ax.get_xlim() ylimmin, ylimmax = ax.get_ylim() ylimdiff = abs(ylimmax) + abs(ylimmin) yperc = 0.035 * ylimdiff yspac = 0.8 # Radius Circles df_cp_tmp = df_cp.loc[(df_cp['dim'] == dim), :] sg_circles = {} for idx, cut_rank, radius in df_cp_tmp[['cut-rank', 'radius-start']].to_records(): # Shapely Circle sg_circle = sg.Point(0, 0).buffer(radius) sg_circles[cut_rank] = sg_circle # Mpl Circle props = next(cyc) mpl_circle = mp.Circle((0, 0), radius=radius, facecolor='none', zorder=6, **props) ax.add_patch(mpl_circle) # Draw Component for module in modules: # xc = module['dim-coords']['xdim'] yc = module['dim-coords']['ydim'] if (dim == xc) and ((dim + 1) == yc): # mid = module['id'] mname = module['name'] # only rename a DM-M12 if mname in dict_replace.keys(): mname = dict_replace.get(mname) # cx = '{xc:d}c'.format(xc=xc) # label 1 component cy = '{yc:d}c'.format(yc=yc) # label 2 component cxy = '{xc:d}c-{yc:d}c-dist'.format(xc=xc, yc=yc) # label-1c-2c-dist # facecolor = module.get('facecolor', 'black') edgecolor = module.get('edgecolor', 'none') hatch = module.get('hatch', None) cxl, cxh = module['dim-coords']['xvals'] cyl, cyh = module['dim-coords']['yvals'] cut_rank = module['dim-coords']['radius-rank'] sg_circle = sg_circles[cut_rank] # Radius of the circle cut_radius = df_ent.loc[((df_ent['dim'] == xc) & (df_ent['cut-rank'] == cut_rank)), 'radius-start'].squeeze() # Select points in module df_pca_tmp = df_pca.loc[((df_pca[cx] >= cxl) & (df_pca[cx] <= cxh) & (df_pca[cy] >= cyl) & (df_pca[cy] <= cyh) & (df_dian[cxy] >= cut_radius)), ['gene', cx, cy]].copy() n = df_pca_tmp.shape[0] name = "M{mid:d}-{mname:s} (n={n:,d})".format(mid=mid, mname=mname, n=n) # name loc name_loc = module.get('name-loc', 'upper left') name_loc_upper_lower, name_loc_left_right = name_loc.split(' ') if name_loc_upper_lower == 'upper': ytext = cyh + yperc # add some space to y-lim-top if abs(ylimmax - ytext) < 0.5: ax.set_ylim((ylimmin - yspac, ylimmax + yspac)) elif name_loc_upper_lower == 'lower': ytext = cyl - yperc # add some space to y-lim-bottom if abs(ylimmin - ytext) < 0.5: ax.set_ylim((ylimmin - yspac, ylimmax + yspac)) if name_loc_left_right == 'left': xtext = cxl ha = 'left' elif name_loc_left_right == 'right': xtext = cxh ha = 'right' sg_box = sg.box(cxl, cyl, cxh, cyh) sg_diff = sg_box.difference(sg_circle) # ax.add_patch(descartes.PolygonPatch(sg_box, fc='b', ec='k', alpha=0.4)) ax.add_patch( descartes.PolygonPatch(sg_diff, facecolor=facecolor, edgecolor=edgecolor, lw=1, zorder=2, alpha=1, hatch=hatch)) # ax.fill([x0, x0, x1, x1], [y0, y1, y1, y0], facecolor=facecolor, edgecolor=edgecolor, lw=1, zorder=2, alpha=1, hatch=hatch) ax.annotate(text=name, xy=(xtext, ytext), fontsize='x-small', zorder=8, fontweight='bold', ha=ha, va='center') else: continue plt.subplots_adjust(left=0.21, right=0.97, bottom=0.17, top=0.89) #plt.tight_layout() wIMGFile = 'images/pca-entropy/{celltype:s}/{layer:s}/img-pca-{celltype:s}-{network:s}-{threshold:s}-{layer:s}-C{dimx:d}x{dimy:d}.pdf'.format( celltype=celltype, network=network, threshold=threshold_str, layer=layer, dimx=dim, dimy=(dim + 1)) ensurePathExists(wIMGFile) plt.savefig(wIMGFile, dpi=300, bbox_inches=None, pad_inches=0.0) plt.close()
'tau': row['tau-norm'], 'tau_scaler': row['scaler(tau-norm)'], # 'color': edge_color_hex, 'gender': gender }) G_patient = G.copy() G_tau = G.copy() # # Set weight # nx.set_edge_attributes(G_patient, nx.get_edge_attributes(G, 'patient_scaler'), 'weight') nx.set_edge_attributes(G_tau, nx.get_edge_attributes(G, 'tau_scaler'), 'weight') # # Export # wGtauFile = 'results/ddi_network_tau.gpickle' ensurePathExists(wGtauFile) nx.write_gpickle(G_tau, wGtauFile) nx.write_graphml(G_tau, 'results/ddi_network_tau.graphml') # wGpatientFile = 'results/ddi_network_patient.gpickle' ensurePathExists(wGpatientFile) nx.write_gpickle(G_patient, wGpatientFile)
gdp1.boundary.plot(ax=ax, lw=0.5, edgecolor='#c7c7c7', zorder=4) # Counties gcounties.boundary.plot(ax=ax, lw=0.75, color='#d62728', zorder=8) # Highways ghighways.plot(ax=ax, lw=0.8, color='#7f7f7f', zorder=7) def xy_format(x, pos): return "{x:.0f}".format(x=(x / 1e4)) # Axis Label y_formatter = FuncFormatter(xy_format) x_formatter = FuncFormatter(xy_format) ax.xaxis.set_major_formatter(x_formatter) ax.yaxis.set_major_formatter(y_formatter) # Plot Zoom Polygon ax.plot(*zoom_polygon.exterior.xy, color='green', lw=1.5, zorder=10) # Zoom in Indiana minx, miny, maxx, maxy = gdp1.total_bounds #minx, miny, maxx, maxy = zoom_polygon.bounds ax.set_xlim(minx, maxx) ax.set_ylim(miny, maxy) # Save plt.subplots_adjust(left=0.04, right=0.98, bottom=0.07, top=0.95, wspace=0.0, hspace=0.0) wIMGfile = 'images/img-indianapolis.pdf' ensurePathExists(wIMGfile) fig.savefig(wIMGfile) plt.close()
def export_genes(celltype='spermatocyte', network='thr', threshold=0.5, layer='DM', modules=[]): """ Export Genes """ threshold_str = str(threshold).replace('.', 'p') # print( 'Exporting genes. PCA modules of {celltype:s}-{network:s}-{threshold:s}-{layer:s}' .format(celltype=celltype, network=network, threshold=threshold_str, layer=layer)) rPCAFile = '../../04-network/results/pca/{celltype:s}/{layer:s}/pca-{celltype:s}-{network:s}-{threshold:s}-{layer:s}-dim.csv.gz'.format( celltype=celltype, network=network, threshold=threshold_str, layer=layer) rDiAnFile = '../../04-network/results/pca/{celltype:s}/{layer:s}/pca-{celltype:s}-{network:s}-{threshold:s}-{layer:s}-dian.csv.gz'.format( celltype=celltype, network=network, threshold=threshold_str, layer=layer) rEntFile = '../../04-network/results/pca/{celltype:s}/{layer:s}/pca-{celltype:s}-{network:s}-{threshold:s}-{layer:s}-entropy.csv.gz'.format( celltype=celltype, network=network, threshold=threshold_str, layer=layer) # wCSVFile = 'results/pca-entropy/{celltype:s}/{layer:s}/pca-{celltype:s}-{network:s}-{threshold:s}-{layer:s}-modules.csv.gz'.format( celltype=celltype, network=network, threshold=threshold_str, layer=layer) df_pca = pd.read_csv(rPCAFile, index_col=0) df_dian = pd.read_csv(rDiAnFile, index_col=0) df_ent = pd.read_csv(rEntFile, index_col=0) # df_cp = df_ent.loc[df_ent['cut-rank'].notnull(), :].sort_values(['dim', 'cut-rank']) ldfS = [] for module in modules: mid = module['id'] mname = module['name'] print("Computing Module {mid:d}-{mname:s}".format(mid=mid, mname=mname)) # xc = module['dim-coords']['xdim'] yc = module['dim-coords']['ydim'] ld1 = '{xc:d}c'.format(xc=xc) # label 1 component ld2 = '{yc:d}c'.format(yc=yc) # label 2 component l12d = '{xc:d}c-{yc:d}c-dist'.format(xc=xc, yc=yc) # label-1c-2c-dist x0, x1 = module['dim-coords']['xvals'] y0, y1 = module['dim-coords']['yvals'] cut_rank = module['dim-coords']['radius-rank'] # Radius of the circle cut_radius = df_ent.loc[((df_ent['dim'] == xc) & (df_ent['cut-rank'] == cut_rank)), 'radius-start'].squeeze() # Select genes in module df_pca_tmp = df_pca.loc[((df_pca[ld1] > x0) & (df_pca[ld1] < x1) & (df_pca[ld2] > y0) & (df_pca[ld2] < y1) & (df_dian[l12d] > cut_radius)), :].copy() # df_pca_tmp['module-id'] = mid df_pca_tmp['module-name'] = mname ldfS.append(df_pca_tmp) dfR = pd.concat(ldfS, axis=0) # Export print("Exporting") ensurePathExists(wCSVFile) dfR.to_csv(wCSVFile)