def export_pathways(): change_dist = load_pickle(SingleRegion.change_dist_filename) matlab_g2i = {g:(i+1) for i,g in enumerate(change_dist.genes)} # NOTE that matlab is one based pathways = pathway_lists.read_all_pathways() pathway_names = pathways.keys() # make sure the order stays fixed pathway_genes_names = np.array([list_of_strings_to_matlab_cell_array(pathways[p]) for p in pathway_names], dtype=object) pathway_genes_idx = np.array([np.array([matlab_g2i[g] for g in pathways[p]]) for p in pathway_names], dtype=object) matlab_p2i = {p:(i+1) for i,p in enumerate(pathway_names)} # NOTE matlab indexing is one based list_names = pathway_lists.all_pathway_lists() list_pathway_names = np.empty(len(list_names), dtype=object) list_pathway_idx = np.empty(len(list_names), dtype=object) for i,listname in enumerate(list_names): pathways_in_list = pathway_lists.list_to_pathway_names(listname) list_pathway_names[i] = list_of_strings_to_matlab_cell_array(pathways_in_list) list_pathway_idx[i] = [matlab_p2i[p] for p in pathways_in_list] README = """\ pathway_names: Cell array of all pathway names. The name in cell number k is the name of the pathway at position k in "pathway_genes_names" and "pathway_genes_idx". pathway_genes_names: Cell array (size <n-pathways>). Each cell contains a cell array of strings which are the gene symbols of the genes in that pathway. pathway_genes_idx: Same as pathway_genes_names, but each cell in the outer cell array is now an array of gene indices corresponding to the gene positions in cube.mat and change-distributions.mat. Hopefully this should be easier to use in matlab. list_names: Names of pathway lists prepared by Noa list_pathway_names: Call array. One item per list. Each item is a cell array of strings which are the names of the pathways belonging to that list. list_pathway_idx: Same as list_pathway_names, but instead of listing the pathways by name, they are given as indices into the previous pathway_xxx structures. """ mdict = dict( README_PATHWAYS = README, pathway_names = list_of_strings_to_matlab_cell_array(pathway_names), pathway_genes_names = pathway_genes_names, pathway_genes_idx = pathway_genes_idx, list_names = list_of_strings_to_matlab_cell_array(list_names), list_pathway_names = list_pathway_names, list_pathway_idx = list_pathway_idx, ) save_matfile(mdict, join(results_dir(), 'export', 'pathways.mat'))
def export_pathways(): change_dist = load_pickle(SingleRegion.change_dist_filename) matlab_g2i = {g: (i + 1) for i, g in enumerate(change_dist.genes) } # NOTE that matlab is one based pathways = pathway_lists.read_all_pathways() pathway_names = pathways.keys() # make sure the order stays fixed pathway_genes_names = np.array([ list_of_strings_to_matlab_cell_array(pathways[p]) for p in pathway_names ], dtype=object) pathway_genes_idx = np.array([ np.array([matlab_g2i[g] for g in pathways[p]]) for p in pathway_names ], dtype=object) matlab_p2i = {p: (i + 1) for i, p in enumerate(pathway_names) } # NOTE matlab indexing is one based list_names = pathway_lists.all_pathway_lists() list_pathway_names = np.empty(len(list_names), dtype=object) list_pathway_idx = np.empty(len(list_names), dtype=object) for i, listname in enumerate(list_names): pathways_in_list = pathway_lists.list_to_pathway_names(listname) list_pathway_names[i] = list_of_strings_to_matlab_cell_array( pathways_in_list) list_pathway_idx[i] = [matlab_p2i[p] for p in pathways_in_list] README = """\ pathway_names: Cell array of all pathway names. The name in cell number k is the name of the pathway at position k in "pathway_genes_names" and "pathway_genes_idx". pathway_genes_names: Cell array (size <n-pathways>). Each cell contains a cell array of strings which are the gene symbols of the genes in that pathway. pathway_genes_idx: Same as pathway_genes_names, but each cell in the outer cell array is now an array of gene indices corresponding to the gene positions in cube.mat and change-distributions.mat. Hopefully this should be easier to use in matlab. list_names: Names of pathway lists prepared by Noa list_pathway_names: Call array. One item per list. Each item is a cell array of strings which are the names of the pathways belonging to that list. list_pathway_idx: Same as list_pathway_names, but instead of listing the pathways by name, they are given as indices into the previous pathway_xxx structures. """ mdict = dict( README_PATHWAYS=README, pathway_names=list_of_strings_to_matlab_cell_array(pathway_names), pathway_genes_names=pathway_genes_names, pathway_genes_idx=pathway_genes_idx, list_names=list_of_strings_to_matlab_cell_array(list_names), list_pathway_names=list_pathway_names, list_pathway_idx=list_pathway_idx, ) save_matfile(mdict, join(results_dir(), 'export', 'pathways.mat'))
for x in res.res if -np.log10(x.pval) > pval_cutoff ] lines = ["{} {} {}".format(r1, pathway, r2) for r1, pathway, r2, w in vals] save_file(join(results_dir(), "cytoscape", "regions.sif"), lines) lines = ["{} ({}) {} = {}".format(r1, pathway, r2, w) for r1, pathway, r2, w in vals] save_file(join(results_dir(), "cytoscape", "edge_weights.attrs"), ["EdgeWeights"] + lines) ############################################################## # main ############################################################## if __name__ == "__main__": parser = argparse.ArgumentParser() parser.add_argument( "--list", help="Pathways list name. Default=brain_go_num_genes_min_15", default="brain_go_num_genes_min_15", choices=["all"] + pathway_lists.all_pathway_lists(), ) parser.add_argument( "--pval_cutoff", help="Only write edges where the -log(p-value) is above this threshold. Default=0 (use all edges)", default="0", ) args = parser.parse_args() pval_cutoff = float(args.pval_cutoff) timing = RegionPairTiming(args.list) export_cytoscape(timing, pval_cutoff)
def compute_region_ordering(singles): timings = singles.region_timings_per_pathway() # pathway -> { r -> mu } sorted_timings = {} # pathway -> list of regions (sorted by mu) for pathway, dct in timings.iteritems(): sorted_regions_and_times = sorted((mu,r) for r,mu in dct.iteritems()) sorted_timings[pathway] = [r for mu,r in sorted_regions_and_times] filename = join(results_dir(), 'dprime-region-ordering-{}.txt'.format(singles.listname)) print 'Saving ordering results to {}'.format(filename) with open(filename,'w') as f: header = '{:<60}{:<7}{}'.format('pathway', 'nGenes', 'Regions (early to late)') print >>f, header print >>f, '-'*len(header) for pathway, ordered_regions in sorted_timings.iteritems(): pathway_size = len(singles.pathways[pathway]) if len(pathway) > 55: pathway = pathway[:55] + '...' ordered_regions = ' '.join(ordered_regions) print >>f, '{pathway:<60}{pathway_size:<7}{ordered_regions}'.format(**locals()) ############################################################## # main ############################################################## if __name__ == '__main__': parser = argparse.ArgumentParser() parser.add_argument('--list', help='Pathways list name. Default=brain_go_num_genes_min_15', default='brain_go_num_genes_min_15', choices=['all'] + pathway_lists.all_pathway_lists()) args = parser.parse_args() singles = SingleRegion(args.list) sorted_timings = compute_region_ordering(singles)
scores.sort(reverse=True) save_scores(singles, scores, order) ############################################################## # main ############################################################## if __name__ == '__main__': cfg.verbosity = 1 parser = argparse.ArgumentParser() parser.add_argument( '--list', help='Pathways list name. Default=brain_go_num_genes_min_15', default='brain_go_num_genes_min_15', choices=['all'] + pathway_lists.all_pathway_lists()) parser.add_argument('--cortex_only', help='Use only cortical regions', action='store_true') parser.add_argument('--draw', help='Draw plot for this pathway and exit') args = parser.parse_args() if args.cortex_only: order = 'V1C A1C S1C M1C DFC MFC OFC'.split() else: order = 'MD STR V1C OFC'.split() singles = SingleRegion(args.list) if args.draw is None: timing_vs_region_order(singles, order) else:
for r in regions: ir = singles.r2i[r] ax.plot(singles.bin_centers, weights[ir,:], linewidth=3, label=r) add_age_ticks(ax, singles.age_scaler) ax.set_yticks([]) ax.set_ylabel('Strength of change', fontsize=cfg.fontsize) ax.legend(frameon=False, fontsize=12) ax.set_title(pathway, fontsize=cfg.fontsize) return fig ############################################################## # main ############################################################## if __name__ == '__main__': cfg.verbosity = 1 parser = argparse.ArgumentParser() parser.add_argument('--list', help='Pathways list name. Default=brain_go_num_genes_min_15', default='brain_go_num_genes_min_15', choices=['all'] + pathway_lists.all_pathway_lists()) parser.add_argument('--regions', help='List (whitespace separated) of regions to plot. Default=all regions') args = parser.parse_args() if args.regions is not None: args.regions = args.regions.split() singles = SingleRegion(args.list) for pathway in singles.pathways.iterkeys(): fig = draw_bumps(singles, pathway, args.regions) dirname = singles.listname if args.regions is not None: dirname = '{}-{}'.format(dirname, '-'.join(args.regions)) filename = join('bumps', dirname, pathway + '.png') save_figure(fig, filename, b_close=True, under_results=True)