def read_cnv_data(self): """ Returns ------- mapping of patient to mutations by entrez ids """ # Real Data # # process Copy Number Variation data patients = {} ENTREZ_COL_NAME = 'Entrez Gene ID' with open(config.get_safe_data_file( self.args.cnv_patient_data)) as csvfile: for row in csv.DictReader(csvfile): ent_id = row[ENTREZ_COL_NAME] for k, v in row.items(): if k == ENTREZ_COL_NAME or v == '0': continue pat_id = k if pat_id not in patients: patients[pat_id] = {'loss': [], 'gain': []} if v == '1': patients[pat_id]['loss'].append(ent_id) else: patients[pat_id]['gain'].append(ent_id) return collections.OrderedDict(sorted(patients.items()))
def read_som_data(self): # Real Data # # process RNA-seq expression data patients = {} with open(config.get_safe_data_file(args.som_patient_data)) as csv_file: reader = csv.DictReader(csv_file) for row in reader: pat_id = row['Patient ID'] ent_id = row['Entrez Gene ID'] if pat_id not in patients: patients[pat_id] = {ent_id} else: patients[pat_id].add(ent_id) return collections.OrderedDict(sorted(patients.items()))
def read_data(self): # Real Data # # process RNA-seq expression data patients = {} with open(config.get_safe_data_file(args.patient_data)) as csvfile: reader = csv.DictReader(csvfile) for row in reader: pat_id = row['Patient ID'] ent_id = row['Entrez Gene ID'] if pat_id not in patients: patients[pat_id] = {ent_id} else: patients[pat_id].add(ent_id) return patients