def main():
    args = parse_args()
    df = pd.read_csv(args.input, header=0, sep='\t')
    ccle = CCLE_Info(args.ccle_metadata)
    cell_lines = CCLE_Info.read_cell_lines(args.cell_lines)
    nonsense_df = extract_mutations_of_type(['Nonsense_Mutation'], df, ccle, cell_lines)
    missense_df = extract_mutations_of_type(['Missense_Mutation'], df, ccle, cell_lines)
    nm_df = extract_mutations_of_type(['Missense_Mutation', 'Nonsense_Mutation'], df, ccle, cell_lines)
    
    # Save the dataframe
    missense_df.to_csv(args.missense_output, sep='\t', header=True, index=True)
    nonsense_df.to_csv(args.nonsense_output, sep='\t', header=True, index=True)
    nm_df.to_csv(args.combined_output, sep='\t', header=True, index=True)
def restrict_to_cell_lines(df, cell_lines):
    new_colname = [CCLE_Info.split_ccle_name(n)[0] for n in list(df.columns)]
    df.columns = new_colname
    df_truncated = df[['Description'] + cell_lines]
    df_truncated = df_truncated.rename(columns={'Description': 'Gene'})
    df_truncated = df_truncated.set_index('Gene')
    return df_truncated.T
def expression_cell(fp):
    df = pd.read_csv(fp, sep='\t', header=0, skiprows=range(0, 2))
    cells = list(df.columns)
    cells.remove('Name')
    cells.remove('Description')
    cell_lines = [CCLE_Info.split_ccle_name(n)[0] for n in cells]
    return set(cell_lines)
def main():
    args = parse_args()
    cell_lines = CCLE_Info.read_cell_lines(args.cell_lines)
    rnaseq_raw = pd.read_csv(args.input,
                             sep='\t',
                             header=0,
                             skiprows=range(0, 2))
    truncated = restrict_to_cell_lines(rnaseq_raw, cell_lines)
    truncated.to_csv(args.output, sep='\t')
def main():
    args = parse_args()
    ccle = CCLE_Info(args.ccle_info)
    dr_genes = drug_response_cell(args.drug_response_data)
    e_genes = expression_cell(args.expression_data)
    m_genes = mutation_cell(args.mutation_data, ccle)

    genes = dr_genes
    genes &= e_genes
    genes &= m_genes

    with open(args.output, 'w') as OUT:
        OUT.write('\n'.join(genes))
예제 #6
0
def main():
    args = parse_args()
    df = pd.read_csv(args.input, header=0)
    cell_lines = CCLE_Info.read_cell_lines(args.cell_lines)

    df = df[['Cell_Line', 'Tissue', 'Perturbagen', 'GRinf', 'GR_AOC']]
    df = df.rename(columns={'Perturbagen': 'Drug'})

    # restrict to cell line
    df = df[df.Cell_Line.isin(cell_lines)]

    print('* Number of drugs:', len(args.drugs))
    print('\tDrugs:', args.drugs)

    for drug, output in zip(args.drugs, args.outputs):
        dr_df = get_drug_response_df(df, drug, cell_lines)
        dr_df.to_csv(output, sep='\t', header=True, index=True)
예제 #7
0
 def test_CCLE_info(self):
     ccle_info = CCLE_Info('../data/raw/CCLE_metadata.csv')
     self.assertEqual(ccle_info.split_ccle_name('NIHOVCAR3_OVARY'), ('NIHOVCAR3','OVARY'))
     self.assertEqual(ccle_info.broad_id_2_ccle_name('ACH-000001'), 'NIHOVCAR3')
     self.assertEqual(ccle_info.ccle_name_2_broad_id('NIHOVCAR3'),'ACH-000001' )