Esempio n. 1
0
 def __init__(self, run, cancer, data_type, patients=None, drop_pc1=False,
              create_real_features=True, create_meta_features=True, 
              filter_down=True, draw_figures=False):
     '''
     '''
     Dataset.__init__(self, cancer.path, data_type, compressed=True)
     self.df = IM.read_data(run.data_path, cancer.name, data_type, 
                            tissue_code='All')
     if patients is not None:
         self.df = self.df.ix[:, patients].dropna(axis=1, how='all')
         self.patients = patients
     else:
         self.patients = self.df.xs('01',1,1).columns
     
     self.global_vars = pd.DataFrame(index=self.patients)
     self.features = {}
     self.global_loadings = pd.DataFrame(index=self.df.index)
     self._calc_global_pcs(drop_pc1)
     
     if create_real_features is True:
         self._get_real_features()
     
     if create_meta_features is True:
         self._get_meta_features(run.gene_sets, filter_down)
         
     self.features = pd.concat(self.features)
         
     if draw_figures is True:
         self._creat_pathway_figures()
Esempio n. 2
0
 def __init__(self, run, cancer, data_type, patients=None, drop_pc1=False,
              create_real_features=True, create_meta_features=True,
              filter_down=True, draw_figures=False):
     '''
     '''
     Dataset.__init__(self, cancer.path, data_type, compressed=True)
     self.df = IM.read_data(run.data_path, cancer.name, data_type,
                            tissue_code='All')
     if patients is not None:
         self.df = self.df.ix[:, patients].dropna(axis=1, how='all')
         self.patients = patients
     else:
         self.patients = self.df.xs('01', 1, 1).columns
     
     self.global_vars = pd.DataFrame(index=self.patients)
     self.features = {}
     self.global_loadings = pd.DataFrame(index=self.df.index)
     self._calc_global_pcs(drop_pc1)
     
     if create_real_features is True:
         self._get_real_features()
     
     if create_meta_features is True:
         self._get_meta_features(run.gene_sets, filter_down)
         
     self.features = pd.concat(self.features)
         
     if draw_figures is True:
         self._creat_pathway_figures()
Esempio n. 3
0
 def __init__(self, run, cancer, cn_type, patients=None):
     Dataset.__init__(self, cancer.path, cn_type, compressed=False)
     min_pat = run.parameters['min_patients']
     if cn_type == 'CN_broad':
         self.df = get_gistic(run.data_path, cancer.name,
                              min_patients=min_pat)
         if patients is not None:
             self.df = self.df.ix[:, patients].dropna(1, how='all')
         self.features = self.df
Esempio n. 4
0
 def __init__(self, run, cancer, cn_type, patients=None):
     '''
     '''
     Dataset.__init__(self, cancer.path, cn_type, compressed=False)
     min_pat = run.parameters['min_patients']
     if cn_type == 'CN_broad':
         self.df = FH.get_gistic(run.data_path, cancer.name,
                                 min_patients=min_pat)
         if patients is not None:
             self.df = self.df.ix[:, patients].dropna(1, how='all')
         self.features = self.df
Esempio n. 5
0
    def __init__(self, run, cancer, patients=None,
                 create_features=True, draw_figures=False):
        """
        """
        Dataset.__init__(self, cancer.path, 'Mutation', compressed=False)
        self.df = FH.get_mutation_matrix(run.data_path, cancer.name)
        if patients is not None:
            self.df = self.df.ix[:, patients].dropna(1, how='all')

        if create_features is True:
            min_pat = run.parameters['min_patients']
            self._create_feature_matrix(run.gene_sets, min_pat)
            
        if draw_figures is True:
            self._create_pathway_figures(run.gene_sets)
Esempio n. 6
0
    def __init__(self,
                 run,
                 cancer,
                 patients=None,
                 create_features=True,
                 draw_figures=False):
        """
        """
        Dataset.__init__(self, cancer.path, 'Mutation', compressed=False)
        self.df = FH.get_mutation_matrix(run.data_path, cancer.name)
        if patients is not None:
            self.df = self.df.ix[:, patients].dropna(1, how='all')

        if create_features is True:
            min_pat = run.parameters['min_patients']
            self._create_feature_matrix(run.gene_sets, min_pat)

        if draw_figures is True:
            self._create_pathway_figures(run.gene_sets)
Esempio n. 7
0
from pandas import Series, DataFrame
from matplotlib.pylab import savefig
import matplotlib.pyplot as plt

from Data.Containers import Dataset
from Reports.Figures import pathway_plot

report_path = sys.argv[1]
cancer_type = sys.argv[2]
data_type = 'MAF'

'''Load in run and mutation data'''
run = pickle.load(open(report_path + '/RunObject.p', 'rb'))
cancer = run.load_cancer(cancer_type)
mut = Dataset(cancer, run, data_type)

'''Create hit_matrix and meta_matrix, filter out genes for features'''
hit_matrix = mut.df.fillna(0).clip_upper(1.)
meta_matrix = DataFrame({p: mut.df.ix[g].sum() for p,g in 
                         run.gene_sets.iteritems()}).T
meta_matrix = meta_matrix.fillna(0).clip_upper(1.)

def size_filter(s):
    '''Make sure features covers a minimum number of patients'''
    min_p = run.parameters['min_patients']
    return s.sum(1).isin(range(min_p, meta_matrix.shape[1] - min_p))

def is_one_gene(p):
    '''Test to see if most mutations are due to single gene'''
    counts = hit_matrix.ix[run.gene_sets[p]].sum(1).dropna().order()
Esempio n. 8
0
from Reports.Figures import pathway_plot
from Processing.Helpers import merge_redundant



report_path = sys.argv[1]
cancer_type = sys.argv[2]
data_type = sys.argv[3]
data_type = data_type[3:]

'''Load in run and CN data'''
run = pickle.load(open(report_path + '/RunObject.p', 'rb'))
cancer = run.load_cancer(cancer_type)

if data_type == 'broad':
    data = Dataset(cancer, run, 'CN_broad')
    data.features = data.df
    data.save()
    sys.exit(0)
    
data = Dataset(cancer, run, 'CN')
data.path = '_'.join([data.path, data_type])

if data_type == 'deletion':
    data.hit_val = -2
elif data_type == 'amplification':
    data.hit_val = 2
elif data_type == 'amplification_low':
    data.df = data.df.replace(1,2)
    data.hit_val = 2