def export_pathways():
    change_dist = load_pickle(SingleRegion.change_dist_filename)
    matlab_g2i = {g:(i+1) for i,g in enumerate(change_dist.genes)} # NOTE that matlab is one based
    
    pathways = pathway_lists.read_all_pathways()
    pathway_names = pathways.keys() # make sure the order stays fixed
    pathway_genes_names = np.array([list_of_strings_to_matlab_cell_array(pathways[p]) for p in pathway_names], dtype=object)
    pathway_genes_idx = np.array([np.array([matlab_g2i[g] for g in pathways[p]]) for p in pathway_names], dtype=object)

    matlab_p2i = {p:(i+1) for i,p in enumerate(pathway_names)} # NOTE matlab indexing is one based
    list_names = pathway_lists.all_pathway_lists()
    list_pathway_names = np.empty(len(list_names), dtype=object)
    list_pathway_idx = np.empty(len(list_names), dtype=object)
    for i,listname in enumerate(list_names):
        pathways_in_list = pathway_lists.list_to_pathway_names(listname)
        list_pathway_names[i] = list_of_strings_to_matlab_cell_array(pathways_in_list)
        list_pathway_idx[i] = [matlab_p2i[p] for p in pathways_in_list]
    README = """\
pathway_names:
Cell array of all pathway names. The name in cell number k is the name of the
pathway at position k in "pathway_genes_names" and "pathway_genes_idx".

pathway_genes_names:
Cell array (size <n-pathways>). Each cell contains a cell array of strings which 
are the gene symbols of the genes in that pathway.

pathway_genes_idx:
Same as pathway_genes_names, but each cell in the outer cell array is now an 
array of gene indices corresponding to the gene positions in cube.mat and change-distributions.mat.
Hopefully this should be easier to use in matlab.

list_names:
Names of pathway lists prepared by Noa

list_pathway_names:
Call array. One item per list. Each item is a cell array of strings which are 
the names of the pathways belonging to that list.

list_pathway_idx:
Same as list_pathway_names, but instead of listing the pathways by name, they 
are given as indices into the previous pathway_xxx structures.
"""
    mdict = dict(
        README_PATHWAYS = README,
        pathway_names = list_of_strings_to_matlab_cell_array(pathway_names),
        pathway_genes_names = pathway_genes_names,
        pathway_genes_idx = pathway_genes_idx,
        list_names = list_of_strings_to_matlab_cell_array(list_names),
        list_pathway_names = list_pathway_names,
        list_pathway_idx = list_pathway_idx,
    )
    save_matfile(mdict, join(results_dir(), 'export', 'pathways.mat'))
Example #2
0
def export_pathways():
    change_dist = load_pickle(SingleRegion.change_dist_filename)
    matlab_g2i = {g: (i + 1)
                  for i, g in enumerate(change_dist.genes)
                  }  # NOTE that matlab is one based

    pathways = pathway_lists.read_all_pathways()
    pathway_names = pathways.keys()  # make sure the order stays fixed
    pathway_genes_names = np.array([
        list_of_strings_to_matlab_cell_array(pathways[p])
        for p in pathway_names
    ],
                                   dtype=object)
    pathway_genes_idx = np.array([
        np.array([matlab_g2i[g] for g in pathways[p]]) for p in pathway_names
    ],
                                 dtype=object)

    matlab_p2i = {p: (i + 1)
                  for i, p in enumerate(pathway_names)
                  }  # NOTE matlab indexing is one based
    list_names = pathway_lists.all_pathway_lists()
    list_pathway_names = np.empty(len(list_names), dtype=object)
    list_pathway_idx = np.empty(len(list_names), dtype=object)
    for i, listname in enumerate(list_names):
        pathways_in_list = pathway_lists.list_to_pathway_names(listname)
        list_pathway_names[i] = list_of_strings_to_matlab_cell_array(
            pathways_in_list)
        list_pathway_idx[i] = [matlab_p2i[p] for p in pathways_in_list]
    README = """\
pathway_names:
Cell array of all pathway names. The name in cell number k is the name of the
pathway at position k in "pathway_genes_names" and "pathway_genes_idx".

pathway_genes_names:
Cell array (size <n-pathways>). Each cell contains a cell array of strings which 
are the gene symbols of the genes in that pathway.

pathway_genes_idx:
Same as pathway_genes_names, but each cell in the outer cell array is now an 
array of gene indices corresponding to the gene positions in cube.mat and change-distributions.mat.
Hopefully this should be easier to use in matlab.

list_names:
Names of pathway lists prepared by Noa

list_pathway_names:
Call array. One item per list. Each item is a cell array of strings which are 
the names of the pathways belonging to that list.

list_pathway_idx:
Same as list_pathway_names, but instead of listing the pathways by name, they 
are given as indices into the previous pathway_xxx structures.
"""
    mdict = dict(
        README_PATHWAYS=README,
        pathway_names=list_of_strings_to_matlab_cell_array(pathway_names),
        pathway_genes_names=pathway_genes_names,
        pathway_genes_idx=pathway_genes_idx,
        list_names=list_of_strings_to_matlab_cell_array(list_names),
        list_pathway_names=list_pathway_names,
        list_pathway_idx=list_pathway_idx,
    )
    save_matfile(mdict, join(results_dir(), 'export', 'pathways.mat'))
        for x in res.res
        if -np.log10(x.pval) > pval_cutoff
    ]

    lines = ["{} {} {}".format(r1, pathway, r2) for r1, pathway, r2, w in vals]
    save_file(join(results_dir(), "cytoscape", "regions.sif"), lines)
    lines = ["{} ({}) {} = {}".format(r1, pathway, r2, w) for r1, pathway, r2, w in vals]
    save_file(join(results_dir(), "cytoscape", "edge_weights.attrs"), ["EdgeWeights"] + lines)


##############################################################
# main
##############################################################
if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument(
        "--list",
        help="Pathways list name. Default=brain_go_num_genes_min_15",
        default="brain_go_num_genes_min_15",
        choices=["all"] + pathway_lists.all_pathway_lists(),
    )
    parser.add_argument(
        "--pval_cutoff",
        help="Only write edges where the -log(p-value) is above this threshold. Default=0 (use all edges)",
        default="0",
    )
    args = parser.parse_args()
    pval_cutoff = float(args.pval_cutoff)
    timing = RegionPairTiming(args.list)
    export_cytoscape(timing, pval_cutoff)
def compute_region_ordering(singles):
    timings = singles.region_timings_per_pathway() # pathway -> { r -> mu }
    sorted_timings = {} # pathway -> list of regions (sorted by mu)
    for pathway, dct in timings.iteritems():
        sorted_regions_and_times = sorted((mu,r) for r,mu in dct.iteritems())
        sorted_timings[pathway] = [r for mu,r in sorted_regions_and_times]

    filename = join(results_dir(), 'dprime-region-ordering-{}.txt'.format(singles.listname))
    print 'Saving ordering results to {}'.format(filename)
    with open(filename,'w') as f:
        header = '{:<60}{:<7}{}'.format('pathway', 'nGenes', 'Regions (early to late)')
        print >>f, header
        print >>f, '-'*len(header)
        for pathway, ordered_regions in sorted_timings.iteritems():
            pathway_size = len(singles.pathways[pathway])
            if len(pathway) > 55:
                pathway = pathway[:55] + '...'
            ordered_regions = ' '.join(ordered_regions)
            print >>f, '{pathway:<60}{pathway_size:<7}{ordered_regions}'.format(**locals())

##############################################################
# main
##############################################################
if __name__ == '__main__':
    parser = argparse.ArgumentParser()
    parser.add_argument('--list', help='Pathways list name. Default=brain_go_num_genes_min_15', default='brain_go_num_genes_min_15', choices=['all'] + pathway_lists.all_pathway_lists())
    args = parser.parse_args()
    
    singles = SingleRegion(args.list)
    sorted_timings = compute_region_ordering(singles)
    scores.sort(reverse=True)
    save_scores(singles, scores, order)


##############################################################
# main
##############################################################
if __name__ == '__main__':
    cfg.verbosity = 1

    parser = argparse.ArgumentParser()
    parser.add_argument(
        '--list',
        help='Pathways list name. Default=brain_go_num_genes_min_15',
        default='brain_go_num_genes_min_15',
        choices=['all'] + pathway_lists.all_pathway_lists())
    parser.add_argument('--cortex_only',
                        help='Use only cortical regions',
                        action='store_true')
    parser.add_argument('--draw', help='Draw plot for this pathway and exit')
    args = parser.parse_args()

    if args.cortex_only:
        order = 'V1C A1C S1C M1C DFC MFC OFC'.split()
    else:
        order = 'MD STR V1C OFC'.split()

    singles = SingleRegion(args.list)
    if args.draw is None:
        timing_vs_region_order(singles, order)
    else:
Example #6
0
    for r in regions:
        ir = singles.r2i[r]
        ax.plot(singles.bin_centers, weights[ir,:], linewidth=3, label=r)
    add_age_ticks(ax, singles.age_scaler)
    ax.set_yticks([])
    ax.set_ylabel('Strength of change', fontsize=cfg.fontsize)
    ax.legend(frameon=False, fontsize=12)
    ax.set_title(pathway, fontsize=cfg.fontsize)
    return fig

##############################################################
# main
##############################################################
if __name__ == '__main__':
    cfg.verbosity = 1
    parser = argparse.ArgumentParser()
    parser.add_argument('--list', help='Pathways list name. Default=brain_go_num_genes_min_15', default='brain_go_num_genes_min_15', choices=['all'] + pathway_lists.all_pathway_lists())
    parser.add_argument('--regions', help='List  (whitespace separated) of regions to plot. Default=all regions')
    args = parser.parse_args()
    if args.regions is not None:
        args.regions = args.regions.split()
    
    singles = SingleRegion(args.list)
    for pathway in singles.pathways.iterkeys():
        fig = draw_bumps(singles, pathway, args.regions)
        dirname = singles.listname
        if args.regions is not None:
            dirname = '{}-{}'.format(dirname, '-'.join(args.regions))
        filename = join('bumps', dirname, pathway + '.png')
        save_figure(fig, filename, b_close=True, under_results=True)