def main(): args = parse_args() df = pd.read_csv(args.input, header=0, sep='\t') ccle = CCLE_Info(args.ccle_metadata) cell_lines = CCLE_Info.read_cell_lines(args.cell_lines) nonsense_df = extract_mutations_of_type(['Nonsense_Mutation'], df, ccle, cell_lines) missense_df = extract_mutations_of_type(['Missense_Mutation'], df, ccle, cell_lines) nm_df = extract_mutations_of_type(['Missense_Mutation', 'Nonsense_Mutation'], df, ccle, cell_lines) # Save the dataframe missense_df.to_csv(args.missense_output, sep='\t', header=True, index=True) nonsense_df.to_csv(args.nonsense_output, sep='\t', header=True, index=True) nm_df.to_csv(args.combined_output, sep='\t', header=True, index=True)
def restrict_to_cell_lines(df, cell_lines): new_colname = [CCLE_Info.split_ccle_name(n)[0] for n in list(df.columns)] df.columns = new_colname df_truncated = df[['Description'] + cell_lines] df_truncated = df_truncated.rename(columns={'Description': 'Gene'}) df_truncated = df_truncated.set_index('Gene') return df_truncated.T
def expression_cell(fp): df = pd.read_csv(fp, sep='\t', header=0, skiprows=range(0, 2)) cells = list(df.columns) cells.remove('Name') cells.remove('Description') cell_lines = [CCLE_Info.split_ccle_name(n)[0] for n in cells] return set(cell_lines)
def main(): args = parse_args() cell_lines = CCLE_Info.read_cell_lines(args.cell_lines) rnaseq_raw = pd.read_csv(args.input, sep='\t', header=0, skiprows=range(0, 2)) truncated = restrict_to_cell_lines(rnaseq_raw, cell_lines) truncated.to_csv(args.output, sep='\t')
def main(): args = parse_args() ccle = CCLE_Info(args.ccle_info) dr_genes = drug_response_cell(args.drug_response_data) e_genes = expression_cell(args.expression_data) m_genes = mutation_cell(args.mutation_data, ccle) genes = dr_genes genes &= e_genes genes &= m_genes with open(args.output, 'w') as OUT: OUT.write('\n'.join(genes))
def main(): args = parse_args() df = pd.read_csv(args.input, header=0) cell_lines = CCLE_Info.read_cell_lines(args.cell_lines) df = df[['Cell_Line', 'Tissue', 'Perturbagen', 'GRinf', 'GR_AOC']] df = df.rename(columns={'Perturbagen': 'Drug'}) # restrict to cell line df = df[df.Cell_Line.isin(cell_lines)] print('* Number of drugs:', len(args.drugs)) print('\tDrugs:', args.drugs) for drug, output in zip(args.drugs, args.outputs): dr_df = get_drug_response_df(df, drug, cell_lines) dr_df.to_csv(output, sep='\t', header=True, index=True)
def test_CCLE_info(self): ccle_info = CCLE_Info('../data/raw/CCLE_metadata.csv') self.assertEqual(ccle_info.split_ccle_name('NIHOVCAR3_OVARY'), ('NIHOVCAR3','OVARY')) self.assertEqual(ccle_info.broad_id_2_ccle_name('ACH-000001'), 'NIHOVCAR3') self.assertEqual(ccle_info.ccle_name_2_broad_id('NIHOVCAR3'),'ACH-000001' )