def test_100genes_main(): opts = {'input': os.path.join(file_dir, 'data/100genes.fa'), 'bed': os.path.join(file_dir, 'data/100genes.bed'), 'mutations': os.path.join(file_dir, 'data/100genes_mutations.txt'), 'output': os.path.join(file_dir, 'output/100genes_deleterious_single_nuc_output.txt'), 'context': 1, 'use_unmapped': False, 'deleterious': 5, 'processes': 0, 'num_iterations': 1000, 'stop_criteria': 100, 'deleterious_pseudo_count': 0, 'unique': False, 'seed': None, 'kind': 'tsg'} # single nucleotide context result = pt.main(opts) num_del_sig = np.sum(result['inactivating BH q-value'] < .1) assert num_del_sig < 7, 'Few of the 100 test genes should not be significant ({0})'.format(num_del_sig) # no context case opts['context'] = 0 opts['output'] = os.path.join(file_dir, 'output/100genes_deleterious_no_context_output.txt') result = pt.main(opts) num_del_sig = np.sum(result['inactivating BH q-value'] < .1) assert num_del_sig < 9, 'Few of the 100 test genes should not be significant ({0})'.format(num_del_sig) # di-nucleotide context opts['context'] = 2 opts['output'] = os.path.join(file_dir, 'output/100genes_deleterious_dinuc_output.txt') result = pt.main(opts) num_del_sig = np.sum(result['inactivating BH q-value'] < .1) assert num_del_sig < 7, 'Few of the 100 test genes should not be significant ({0})'.format(num_del_sig)
def test_ctnnb1_hotmaps_main(): opts = {'input': os.path.join(file_dir, 'data/CTNNB1.fa'), 'bed': os.path.join(file_dir, 'data/CTNNB1.bed'), 'mutations': os.path.join(file_dir, 'data/CTNNB1_mutations.txt'), 'output': os.path.join(file_dir, 'output/CTNNB1_output_hotmaps.txt'), 'context': 1.5, 'use_unmapped': False, 'processes': 0, 'num_iterations': 1000, 'stop_criteria': 100, 'unique': 0, 'seed': None, 'window': '3', 'report_index': True, 'null_distr_dir': os.path.join(file_dir, 'output/hotmaps1d_null'), 'kind': 'hotmaps1d'} # single nucleotide context result = rt.main(opts) # di-nucleotide case opts['window'] = '6' result = rt.main(opts) # no context case opts['window'] = '9' result = rt.main(opts)
def test_ctnnb1_main(): opts = {'input': os.path.join(file_dir, 'data/CTNNB1.fa'), 'bed': os.path.join(file_dir, 'data/CTNNB1.bed'), 'mutations': os.path.join(file_dir, 'data/CTNNB1_mutations.txt'), 'output': os.path.join(file_dir, 'output/CTNNB1_output.txt'), 'context': 1, 'use_unmapped': False, 'tsg_score': .1, 'recurrent': 3, 'fraction': .02, 'score_dir': os.path.join(file_dir, 'data/scores'), 'processes': 0, 'num_iterations': 10000, 'stop_criteria': 100, 'recurrent_pseudo_count': 0, 'unique': 0, 'seed': None, 'kind': 'oncogene'} # single nucleotide context result = pt.main(opts) assert result.ix[0, 'entropy p-value'] < 0.001, 'CTNNB1 should have a very low p-value ({0}>.001)'.format(result[0][2]) # di-nucleotide case opts['context'] = 2 result = pt.main(opts) assert result.ix[0, 'entropy p-value'] < 0.001, 'CTNNB1 should have a very low p-value ({0}>.001)'.format(result[0][2]) # no context case opts['context'] = 0 result = pt.main(opts) assert result.ix[0, 'entropy p-value'] < 0.001, 'CTNNB1 should have a very low p-value ({0}>.001)'.format(result[0][2])
def test_tp53_main(): opts = {'input': os.path.join(file_dir, 'data/tp53.fa'), 'bed': os.path.join(file_dir, 'data/tp53.bed'), 'mutations': os.path.join(file_dir, 'data/tp53_mutations.txt'), 'output': os.path.join(file_dir, 'output/tp53_output.txt'), 'context': 1, 'use_unmapped': False, 'deleterious': 5, 'processes': 0, 'num_iterations': 10000, 'stop_criteria': 100, 'deleterious_pseudo_count': 0, 'unique': False, 'seed': None, 'kind': 'tsg'} # single nucleotide context result = pt.main(opts) assert result.ix[0, 'inactivating p-value'] < 0.001, 'TP53 should have a very low p-value ({0}>.001)'.format(result[0][2]) # di-nucleotide case opts['context'] = 2 result = pt.main(opts) assert result.ix[0, 'inactivating p-value'] < 0.001, 'TP53 should have a very low p-value ({0}>.001)'.format(result[0][2]) # no context case opts['context'] = 0 result = pt.main(opts) assert result.ix[0, 'inactivating p-value'] < 0.001, 'TP53 should have a very low p-value ({0}>.001)'.format(result[0][2])
def test_100genes_main(): opts = { 'input': os.path.join(file_dir, 'data/100genes.fa'), 'bed': os.path.join(file_dir, 'data/100genes.bed'), 'mutations': os.path.join(file_dir, 'data/100genes_mutations.txt'), 'output': os.path.join(file_dir, 'output/100genes_deleterious_single_nuc_output.txt'), 'context': 1, 'use_unmapped': False, 'deleterious': 5, 'processes': 0, 'num_iterations': 1000, 'stop_criteria': 100, 'deleterious_pseudo_count': 0, 'unique': False, 'seed': None, 'kind': 'tsg' } # single nucleotide context result = pt.main(opts) num_del_sig = np.sum(result['inactivating BH q-value'] < .1) assert num_del_sig < 7, 'Few of the 100 test genes should not be significant ({0})'.format( num_del_sig) # no context case opts['context'] = 0 opts['output'] = os.path.join( file_dir, 'output/100genes_deleterious_no_context_output.txt') result = pt.main(opts) num_del_sig = np.sum(result['inactivating BH q-value'] < .1) assert num_del_sig < 9, 'Few of the 100 test genes should not be significant ({0})'.format( num_del_sig) # di-nucleotide context opts['context'] = 2 opts['output'] = os.path.join( file_dir, 'output/100genes_deleterious_dinuc_output.txt') result = pt.main(opts) num_del_sig = np.sum(result['inactivating BH q-value'] < .1) assert num_del_sig < 7, 'Few of the 100 test genes should not be significant ({0})'.format( num_del_sig)
def test_100genes_main(): opts = { 'input': os.path.join(file_dir, 'data/100genes.fa'), 'bed': os.path.join(file_dir, 'data/100genes.bed'), 'mutations': os.path.join(file_dir, 'data/100genes_mutations.txt'), 'output': os.path.join(file_dir, 'output/100genes_hotmaps_single_nuc_output.txt'), 'context': 1, 'use_unmapped': False, 'processes': 0, 'num_iterations': 1000, 'stop_criteria': 100, 'unique': False, 'seed': None, 'window': 3, 'kind': 'hotmaps1d' } # single nucleotide context result = rt.main(opts) num_sig = np.sum(result['q-value'] < .01) assert num_sig < 9, 'Few of the 100 test genes should not be significant ({0})'.format( num_sig)
def main(opts, mutation_df=None, frameshift_df=None): # get output file myoutput_path = opts['output'] opts['output'] = '' # perform randomization-based test result_df = rt.main(opts, mutation_df) # clean up p-values for combined p-value calculation if opts['kind'] == 'tsg': p_val_col = 'inactivating p-value' q_val_col = 'inactivating BH q-value' elif opts['kind'] == 'effect': p_val_col = 'entropy-on-effect p-value' q_val_col = 'entropy-on-effect BH q-value' elif opts['kind'] == 'oncogene': p_val_col = 'entropy p-value' q_val_col = 'entropy BH q-value' elif opts['kind'] == 'protein': p_val_col = 'normalized graph-smoothed position entropy p-value' q_val_col = 'normalized graph-smoothed position entropy BH q-value' result_df[p_val_col] = result_df[p_val_col].fillna(1) result_df[q_val_col] = result_df[q_val_col].fillna(1) if opts['kind'] == 'tsg': # drop genes that never occur if opts['kind'] == 'tsg' or opts['kind'] == 'effect': no_ssvs = (result_df['Total SNV Mutations'] == 0) result_df = result_df[~no_ssvs] result_df = result_df.sort_values(by=p_val_col) elif opts['kind'] == 'oncogene': # get FDR result_df = result_df[result_df['Total Mutations'] > 0] result_df['entropy BH q-value'] = mypval.bh_fdr( result_df['entropy p-value']) # combine p-values result_df['tmp entropy p-value'] = result_df['entropy p-value'] result_df['tmp vest p-value'] = result_df['vest p-value'] result_df.loc[result_df['entropy p-value'] == 0, 'tmp entropy p-value'] = 1. / opts['num_iterations'] result_df.loc[result_df['vest p-value'] == 0, 'tmp vest p-value'] = 1. / opts['num_iterations'] result_df['combined p-value'] = result_df[[ 'tmp entropy p-value', 'tmp vest p-value' ]].apply(mypval.fishers_method, axis=1) result_df['combined BH q-value'] = mypval.bh_fdr( result_df['combined p-value']) del result_df['tmp vest p-value'] del result_df['tmp entropy p-value'] if myoutput_path: # write output if specified result_df.to_csv(myoutput_path, sep='\t', index=False) result_df = result_df.set_index('gene', drop=False) return result_df
def main(opts, mutation_df=None, frameshift_df=None): # get output file myoutput_path = opts['output'] opts['output'] = '' # perform randomization-based test result_df = rt.main(opts, mutation_df) # clean up p-values for combined p-value calculation if opts['kind'] == 'tsg': p_val_col = 'inactivating p-value' q_val_col = 'inactivating BH q-value' elif opts['kind'] == 'effect': p_val_col = 'entropy-on-effect p-value' q_val_col = 'entropy-on-effect BH q-value' elif opts['kind'] == 'oncogene': p_val_col = 'entropy p-value' q_val_col = 'entropy BH q-value' elif opts['kind'] == 'protein': p_val_col = 'normalized graph-smoothed position entropy p-value' q_val_col = 'normalized graph-smoothed position entropy BH q-value' elif opts['kind'] == 'hotmaps1d': p_val_col = 'p-value' q_val_col = 'q-value' result_df[p_val_col] = result_df[p_val_col].fillna(1) result_df[q_val_col] = result_df[q_val_col].fillna(1) if opts['kind'] == 'tsg': # drop genes that never occur if opts['kind'] == 'tsg' or opts['kind'] == 'effect': no_ssvs = (result_df['Total SNV Mutations']==0) result_df = result_df[~no_ssvs] result_df = result_df.sort_values(by=p_val_col) elif opts['kind'] == 'oncogene': # get FDR result_df = result_df[result_df['Total Mutations']>0] result_df['entropy BH q-value'] = mypval.bh_fdr(result_df['entropy p-value']) # combine p-values result_df['tmp entropy p-value'] = result_df['entropy p-value'] result_df['tmp vest p-value'] = result_df['vest p-value'] result_df.loc[result_df['entropy p-value']==0, 'tmp entropy p-value'] = 1. / opts['num_iterations'] result_df.loc[result_df['vest p-value']==0, 'tmp vest p-value'] = 1. / opts['num_iterations'] result_df['combined p-value'] = result_df[['tmp entropy p-value', 'tmp vest p-value']].apply(mypval.fishers_method, axis=1) result_df['combined BH q-value'] = mypval.bh_fdr(result_df['combined p-value']) del result_df['tmp vest p-value'] del result_df['tmp entropy p-value'] if myoutput_path: # write output if specified result_df.to_csv(myoutput_path, sep='\t', index=False) result_df = result_df.set_index('gene', drop=False) return result_df
def test_ctnnb1_main(): opts = { 'input': os.path.join(file_dir, 'data/CTNNB1.fa'), 'bed': os.path.join(file_dir, 'data/CTNNB1.bed'), 'mutations': os.path.join(file_dir, 'data/CTNNB1_mutations.txt'), 'output': os.path.join(file_dir, 'output/CTNNB1_output.txt'), 'context': 1, 'use_unmapped': False, 'tsg_score': .1, 'recurrent': 3, 'fraction': .02, 'score_dir': os.path.join(file_dir, 'data/scores'), 'processes': 0, 'num_iterations': 10000, 'stop_criteria': 100, 'recurrent_pseudo_count': 0, 'unique': 0, 'seed': None, 'kind': 'oncogene' } # single nucleotide context result = pt.main(opts) assert result.ix[ 0, 'entropy p-value'] < 0.001, 'CTNNB1 should have a very low p-value ({0}>.001)'.format( result[0][2]) # di-nucleotide case opts['context'] = 2 result = pt.main(opts) assert result.ix[ 0, 'entropy p-value'] < 0.001, 'CTNNB1 should have a very low p-value ({0}>.001)'.format( result[0][2]) # no context case opts['context'] = 0 result = pt.main(opts) assert result.ix[ 0, 'entropy p-value'] < 0.001, 'CTNNB1 should have a very low p-value ({0}>.001)'.format( result[0][2])
def test_tp53_main(): opts = { 'input': os.path.join(file_dir, 'data/tp53.fa'), 'bed': os.path.join(file_dir, 'data/tp53.bed'), 'mutations': os.path.join(file_dir, 'data/tp53_mutations.txt'), 'output': os.path.join(file_dir, 'output/tp53_output.txt'), 'context': 1, 'use_unmapped': False, 'deleterious': 5, 'processes': 0, 'num_iterations': 10000, 'stop_criteria': 100, 'deleterious_pseudo_count': 0, 'unique': False, 'seed': None, 'kind': 'tsg' } # single nucleotide context result = pt.main(opts) assert result.ix[ 0, 'inactivating p-value'] < 0.001, 'TP53 should have a very low p-value ({0}>.001)'.format( result[0][2]) # di-nucleotide case opts['context'] = 2 result = pt.main(opts) assert result.ix[ 0, 'inactivating p-value'] < 0.001, 'TP53 should have a very low p-value ({0}>.001)'.format( result[0][2]) # no context case opts['context'] = 0 result = pt.main(opts) assert result.ix[ 0, 'inactivating p-value'] < 0.001, 'TP53 should have a very low p-value ({0}>.001)'.format( result[0][2])
def test_100genes_main(): opts = {'input': os.path.join(file_dir, 'data/100genes.fa'), 'bed': os.path.join(file_dir, 'data/100genes.bed'), 'mutations': os.path.join(file_dir, 'data/100genes_mutations.txt'), 'output': os.path.join(file_dir, 'output/100genes_position_single_nuc_output.txt'), 'context': 1, 'tsg_score': .1, 'recurrent': 3, 'fraction': .02, 'use_unmapped': False, 'processes': 0, 'num_iterations': 1000, 'stop_criteria': 100, 'score_dir': os.path.join(file_dir, 'data/scores'), 'recurrent_pseudo_count': 0, 'unique': False, 'seed': None, 'kind': 'oncogene'} # single nucleotide context result = pt.main(opts) #tested_result = result[result['Performed Recurrency Test']==1] num_ent_sig = np.sum(result['entropy BH q-value'] < .1) assert num_ent_sig < 9, 'Few of the 100 test genes should not be significant ({0})'.format(num_ent_sig) # no context case opts['context'] = 0 opts['output'] = os.path.join(file_dir, 'output/100genes_position_no_context_output.txt') result = pt.main(opts) #tested_result = result[result['Performed Recurrency Test']==1] num_ent_sig = np.sum(result['entropy BH q-value'] < .1) assert num_ent_sig < 9, 'Few of the 100 test genes should not be significant ({0})'.format(num_ent_sig) # di-nucleotide context opts['context'] = 2 opts['output'] = os.path.join(file_dir, 'output/100genes_position_dinuc_output.txt') result = pt.main(opts) #tested_result = result[result['Performed Recurrency Test']==1] num_ent_sig = np.sum(result['entropy BH q-value'] < .1) assert num_ent_sig < 9, 'Few of the 100 test genes should not be significant ({0})'.format(num_ent_sig)
def test_100genes_main(): opts = { 'input': os.path.join(file_dir, 'data/100genes.fa'), 'bed': os.path.join(file_dir, 'data/100genes.bed'), 'mutations': os.path.join(file_dir, 'data/100genes_mutations.txt'), 'output': os.path.join(file_dir, 'output/100genes_position_single_nuc_output.txt'), 'context': 1, 'tsg_score': .1, 'recurrent': 3, 'fraction': .02, 'use_unmapped': False, 'processes': 0, 'num_iterations': 1000, 'stop_criteria': 100, 'score_dir': os.path.join(file_dir, 'data/scores'), 'recurrent_pseudo_count': 0, 'unique': False, 'seed': None, 'kind': 'oncogene' } # single nucleotide context result = pt.main(opts) #tested_result = result[result['Performed Recurrency Test']==1] num_ent_sig = np.sum(result['entropy BH q-value'] < .1) assert num_ent_sig < 9, 'Few of the 100 test genes should not be significant ({0})'.format( num_ent_sig) # no context case opts['context'] = 0 opts['output'] = os.path.join( file_dir, 'output/100genes_position_no_context_output.txt') result = pt.main(opts) #tested_result = result[result['Performed Recurrency Test']==1] num_ent_sig = np.sum(result['entropy BH q-value'] < .1) assert num_ent_sig < 9, 'Few of the 100 test genes should not be significant ({0})'.format( num_ent_sig) # di-nucleotide context opts['context'] = 2 opts['output'] = os.path.join(file_dir, 'output/100genes_position_dinuc_output.txt') result = pt.main(opts) #tested_result = result[result['Performed Recurrency Test']==1] num_ent_sig = np.sum(result['entropy BH q-value'] < .1) assert num_ent_sig < 9, 'Few of the 100 test genes should not be significant ({0})'.format( num_ent_sig)