def __init__(self, run, cancer, data_type, patients=None, drop_pc1=False, create_real_features=True, create_meta_features=True, filter_down=True, draw_figures=False): ''' ''' Dataset.__init__(self, cancer.path, data_type, compressed=True) self.df = IM.read_data(run.data_path, cancer.name, data_type, tissue_code='All') if patients is not None: self.df = self.df.ix[:, patients].dropna(axis=1, how='all') self.patients = patients else: self.patients = self.df.xs('01',1,1).columns self.global_vars = pd.DataFrame(index=self.patients) self.features = {} self.global_loadings = pd.DataFrame(index=self.df.index) self._calc_global_pcs(drop_pc1) if create_real_features is True: self._get_real_features() if create_meta_features is True: self._get_meta_features(run.gene_sets, filter_down) self.features = pd.concat(self.features) if draw_figures is True: self._creat_pathway_figures()
def __init__(self, run, cancer, data_type, patients=None, drop_pc1=False, create_real_features=True, create_meta_features=True, filter_down=True, draw_figures=False): ''' ''' Dataset.__init__(self, cancer.path, data_type, compressed=True) self.df = IM.read_data(run.data_path, cancer.name, data_type, tissue_code='All') if patients is not None: self.df = self.df.ix[:, patients].dropna(axis=1, how='all') self.patients = patients else: self.patients = self.df.xs('01', 1, 1).columns self.global_vars = pd.DataFrame(index=self.patients) self.features = {} self.global_loadings = pd.DataFrame(index=self.df.index) self._calc_global_pcs(drop_pc1) if create_real_features is True: self._get_real_features() if create_meta_features is True: self._get_meta_features(run.gene_sets, filter_down) self.features = pd.concat(self.features) if draw_figures is True: self._creat_pathway_figures()
def __init__(self, run, cancer, cn_type, patients=None): Dataset.__init__(self, cancer.path, cn_type, compressed=False) min_pat = run.parameters['min_patients'] if cn_type == 'CN_broad': self.df = get_gistic(run.data_path, cancer.name, min_patients=min_pat) if patients is not None: self.df = self.df.ix[:, patients].dropna(1, how='all') self.features = self.df
def __init__(self, run, cancer, cn_type, patients=None): ''' ''' Dataset.__init__(self, cancer.path, cn_type, compressed=False) min_pat = run.parameters['min_patients'] if cn_type == 'CN_broad': self.df = FH.get_gistic(run.data_path, cancer.name, min_patients=min_pat) if patients is not None: self.df = self.df.ix[:, patients].dropna(1, how='all') self.features = self.df
def __init__(self, run, cancer, patients=None, create_features=True, draw_figures=False): """ """ Dataset.__init__(self, cancer.path, 'Mutation', compressed=False) self.df = FH.get_mutation_matrix(run.data_path, cancer.name) if patients is not None: self.df = self.df.ix[:, patients].dropna(1, how='all') if create_features is True: min_pat = run.parameters['min_patients'] self._create_feature_matrix(run.gene_sets, min_pat) if draw_figures is True: self._create_pathway_figures(run.gene_sets)
from pandas import Series, DataFrame from matplotlib.pylab import savefig import matplotlib.pyplot as plt from Data.Containers import Dataset from Reports.Figures import pathway_plot report_path = sys.argv[1] cancer_type = sys.argv[2] data_type = 'MAF' '''Load in run and mutation data''' run = pickle.load(open(report_path + '/RunObject.p', 'rb')) cancer = run.load_cancer(cancer_type) mut = Dataset(cancer, run, data_type) '''Create hit_matrix and meta_matrix, filter out genes for features''' hit_matrix = mut.df.fillna(0).clip_upper(1.) meta_matrix = DataFrame({p: mut.df.ix[g].sum() for p,g in run.gene_sets.iteritems()}).T meta_matrix = meta_matrix.fillna(0).clip_upper(1.) def size_filter(s): '''Make sure features covers a minimum number of patients''' min_p = run.parameters['min_patients'] return s.sum(1).isin(range(min_p, meta_matrix.shape[1] - min_p)) def is_one_gene(p): '''Test to see if most mutations are due to single gene''' counts = hit_matrix.ix[run.gene_sets[p]].sum(1).dropna().order()
from Reports.Figures import pathway_plot from Processing.Helpers import merge_redundant report_path = sys.argv[1] cancer_type = sys.argv[2] data_type = sys.argv[3] data_type = data_type[3:] '''Load in run and CN data''' run = pickle.load(open(report_path + '/RunObject.p', 'rb')) cancer = run.load_cancer(cancer_type) if data_type == 'broad': data = Dataset(cancer, run, 'CN_broad') data.features = data.df data.save() sys.exit(0) data = Dataset(cancer, run, 'CN') data.path = '_'.join([data.path, data_type]) if data_type == 'deletion': data.hit_val = -2 elif data_type == 'amplification': data.hit_val = 2 elif data_type == 'amplification_low': data.df = data.df.replace(1,2) data.hit_val = 2