'''Test to see if most mutations are due to single gene''' counts = hit_matrix.ix[run.gene_sets[p]].sum(1).dropna().order() with_top = hit_matrix.ix[run.gene_sets[p]].sum().clip_upper(1).sum() without = hit_matrix.ix[run.gene_sets[p] - {counts.idxmax()}].sum().clip_upper(1).sum() return ((with_top - without) / without) > .5 meta_matrix = meta_matrix[size_filter(meta_matrix)] s = Series({p: is_one_gene(p) for p in meta_matrix.index}) meta_matrix = meta_matrix.ix[s==False] hit_matrix = hit_matrix[size_filter(hit_matrix)] '''Add passing features to the Data Object''' mut.features = meta_matrix.append(hit_matrix) mut.compress() mut.uncompress() '''Save updated Data Object (with additional features field''' mut.save() mut.uncompress() '''Draw pathway_plots for pathway level features''' meta_features = [f for f in mut.features.index if f in run.gene_sets] pathway_plot_folder = mut.path + '/Figures/PathwayPlots/' if not os.path.isdir(pathway_plot_folder): os.makedirs(pathway_plot_folder) for i,p in enumerate(meta_features): df = mut.df.ix[run.gene_sets[p]] pathway_plot(df) savefig(pathway_plot_folder + p)
meta_matrix = meta_matrix[size_filter(meta_matrix)] s = Series({p: is_one_gene(p) for p in meta_matrix.index}) meta_matrix = meta_matrix.ix[s==False] hit_matrix = hit_matrix[size_filter(hit_matrix)] hit_genes = hit_matrix.copy() hit_genes.index = hit_genes.index.get_level_values(0) non_redundant = merge_redundant(hit_genes.append(meta_matrix)) '''Add passing features to the Data Object''' data.features = non_redundant '''Save updated Data Object (with additional features field)''' data.df = data.df.replace([1,-1], 0) data.save() data.uncompress() '''Draw pathway_plots for pathway level features''' meta_features = [f for f in data.features.index if f in run.gene_sets] pathway_plot_folder = data.path + '/Figures/PathwayPlots/' if not os.path.isdir(pathway_plot_folder): os.makedirs(pathway_plot_folder) hit_mat = data.df.copy() hit_mat.index = hit_mat.index.get_level_values(2) hit_mat = (hit_mat == data.hit_val).astype(float) for i,p in enumerate(meta_features): if os.path.isfile(pathway_plot_folder + p): continue