def test_100genes_main():
    opts = {'input': os.path.join(file_dir, 'data/100genes.fa'),
            'bed': os.path.join(file_dir, 'data/100genes.bed'),
            'mutations': os.path.join(file_dir, 'data/100genes_mutations.txt'),
            'output': os.path.join(file_dir, 'output/100genes_deleterious_single_nuc_output.txt'),
            'context': 1,
            'use_unmapped': False,
            'deleterious': 5,
            'processes': 0,
            'num_iterations': 1000,
            'stop_criteria': 100,
            'deleterious_pseudo_count': 0,
            'unique': False,
            'seed': None,
            'kind': 'tsg'}
    # single nucleotide context
    result = pt.main(opts)
    num_del_sig = np.sum(result['inactivating BH q-value'] < .1)
    assert num_del_sig < 7, 'Few of the 100 test genes should not be significant ({0})'.format(num_del_sig)

    # no context case
    opts['context'] = 0
    opts['output'] = os.path.join(file_dir, 'output/100genes_deleterious_no_context_output.txt')
    result = pt.main(opts)
    num_del_sig = np.sum(result['inactivating BH q-value'] < .1)
    assert num_del_sig < 9, 'Few of the 100 test genes should not be significant ({0})'.format(num_del_sig)

    # di-nucleotide context
    opts['context'] = 2
    opts['output'] = os.path.join(file_dir, 'output/100genes_deleterious_dinuc_output.txt')
    result = pt.main(opts)
    num_del_sig = np.sum(result['inactivating BH q-value'] < .1)
    assert num_del_sig < 7, 'Few of the 100 test genes should not be significant ({0})'.format(num_del_sig)
def test_ctnnb1_hotmaps_main():
    opts = {'input': os.path.join(file_dir, 'data/CTNNB1.fa'),
            'bed': os.path.join(file_dir, 'data/CTNNB1.bed'),
            'mutations': os.path.join(file_dir, 'data/CTNNB1_mutations.txt'),
            'output': os.path.join(file_dir, 'output/CTNNB1_output_hotmaps.txt'),
            'context': 1.5,
            'use_unmapped': False,
            'processes': 0,
            'num_iterations': 1000,
            'stop_criteria': 100,
            'unique': 0,
            'seed': None,
            'window': '3',
            'report_index': True,
            'null_distr_dir': os.path.join(file_dir, 'output/hotmaps1d_null'),
            'kind': 'hotmaps1d'}
    # single nucleotide context
    result = rt.main(opts)

    # di-nucleotide case
    opts['window'] = '6'
    result = rt.main(opts)

    # no context case
    opts['window'] = '9'
    result = rt.main(opts)
def test_ctnnb1_main():
    opts = {'input': os.path.join(file_dir, 'data/CTNNB1.fa'),
            'bed': os.path.join(file_dir, 'data/CTNNB1.bed'),
            'mutations': os.path.join(file_dir, 'data/CTNNB1_mutations.txt'),
            'output': os.path.join(file_dir, 'output/CTNNB1_output.txt'),
            'context': 1,
            'use_unmapped': False,
            'tsg_score': .1,
            'recurrent': 3,
            'fraction': .02,
            'score_dir': os.path.join(file_dir, 'data/scores'),
            'processes': 0,
            'num_iterations': 10000,
            'stop_criteria': 100,
            'recurrent_pseudo_count': 0,
            'unique': 0,
            'seed': None,
            'kind': 'oncogene'}
    # single nucleotide context
    result = pt.main(opts)
    assert result.ix[0, 'entropy p-value'] < 0.001, 'CTNNB1 should have a very low p-value ({0}>.001)'.format(result[0][2])

    # di-nucleotide case
    opts['context'] = 2
    result = pt.main(opts)
    assert result.ix[0, 'entropy p-value'] < 0.001, 'CTNNB1 should have a very low p-value ({0}>.001)'.format(result[0][2])

    # no context case
    opts['context'] = 0
    result = pt.main(opts)
    assert result.ix[0, 'entropy p-value'] < 0.001, 'CTNNB1 should have a very low p-value ({0}>.001)'.format(result[0][2])
def test_tp53_main():
    opts = {'input': os.path.join(file_dir, 'data/tp53.fa'),
            'bed': os.path.join(file_dir, 'data/tp53.bed'),
            'mutations': os.path.join(file_dir, 'data/tp53_mutations.txt'),
            'output': os.path.join(file_dir, 'output/tp53_output.txt'),
            'context': 1,
            'use_unmapped': False,
            'deleterious': 5,
            'processes': 0,
            'num_iterations': 10000,
            'stop_criteria': 100,
            'deleterious_pseudo_count': 0,
            'unique': False,
            'seed': None,
            'kind': 'tsg'}
    # single nucleotide context
    result = pt.main(opts)
    assert result.ix[0, 'inactivating p-value'] < 0.001, 'TP53 should have a very low p-value ({0}>.001)'.format(result[0][2])

    # di-nucleotide case
    opts['context'] = 2
    result = pt.main(opts)
    assert result.ix[0, 'inactivating p-value'] < 0.001, 'TP53 should have a very low p-value ({0}>.001)'.format(result[0][2])

    # no context case
    opts['context'] = 0
    result = pt.main(opts)
    assert result.ix[0, 'inactivating p-value'] < 0.001, 'TP53 should have a very low p-value ({0}>.001)'.format(result[0][2])
Exemple #5
0
def test_100genes_main():
    opts = {
        'input':
        os.path.join(file_dir, 'data/100genes.fa'),
        'bed':
        os.path.join(file_dir, 'data/100genes.bed'),
        'mutations':
        os.path.join(file_dir, 'data/100genes_mutations.txt'),
        'output':
        os.path.join(file_dir,
                     'output/100genes_deleterious_single_nuc_output.txt'),
        'context':
        1,
        'use_unmapped':
        False,
        'deleterious':
        5,
        'processes':
        0,
        'num_iterations':
        1000,
        'stop_criteria':
        100,
        'deleterious_pseudo_count':
        0,
        'unique':
        False,
        'seed':
        None,
        'kind':
        'tsg'
    }
    # single nucleotide context
    result = pt.main(opts)
    num_del_sig = np.sum(result['inactivating BH q-value'] < .1)
    assert num_del_sig < 7, 'Few of the 100 test genes should not be significant ({0})'.format(
        num_del_sig)

    # no context case
    opts['context'] = 0
    opts['output'] = os.path.join(
        file_dir, 'output/100genes_deleterious_no_context_output.txt')
    result = pt.main(opts)
    num_del_sig = np.sum(result['inactivating BH q-value'] < .1)
    assert num_del_sig < 9, 'Few of the 100 test genes should not be significant ({0})'.format(
        num_del_sig)

    # di-nucleotide context
    opts['context'] = 2
    opts['output'] = os.path.join(
        file_dir, 'output/100genes_deleterious_dinuc_output.txt')
    result = pt.main(opts)
    num_del_sig = np.sum(result['inactivating BH q-value'] < .1)
    assert num_del_sig < 7, 'Few of the 100 test genes should not be significant ({0})'.format(
        num_del_sig)
Exemple #6
0
def test_100genes_main():
    opts = {
        'input':
        os.path.join(file_dir, 'data/100genes.fa'),
        'bed':
        os.path.join(file_dir, 'data/100genes.bed'),
        'mutations':
        os.path.join(file_dir, 'data/100genes_mutations.txt'),
        'output':
        os.path.join(file_dir,
                     'output/100genes_hotmaps_single_nuc_output.txt'),
        'context':
        1,
        'use_unmapped':
        False,
        'processes':
        0,
        'num_iterations':
        1000,
        'stop_criteria':
        100,
        'unique':
        False,
        'seed':
        None,
        'window':
        3,
        'kind':
        'hotmaps1d'
    }
    # single nucleotide context
    result = rt.main(opts)
    num_sig = np.sum(result['q-value'] < .01)
    assert num_sig < 9, 'Few of the 100 test genes should not be significant ({0})'.format(
        num_sig)
Exemple #7
0
def main(opts, mutation_df=None, frameshift_df=None):
    # get output file
    myoutput_path = opts['output']
    opts['output'] = ''

    # perform randomization-based test
    result_df = rt.main(opts, mutation_df)

    # clean up p-values for combined p-value calculation
    if opts['kind'] == 'tsg':
        p_val_col = 'inactivating p-value'
        q_val_col = 'inactivating BH q-value'
    elif opts['kind'] == 'effect':
        p_val_col = 'entropy-on-effect p-value'
        q_val_col = 'entropy-on-effect BH q-value'
    elif opts['kind'] == 'oncogene':
        p_val_col = 'entropy p-value'
        q_val_col = 'entropy BH q-value'
    elif opts['kind'] == 'protein':
        p_val_col = 'normalized graph-smoothed position entropy p-value'
        q_val_col = 'normalized graph-smoothed position entropy BH q-value'
    result_df[p_val_col] = result_df[p_val_col].fillna(1)
    result_df[q_val_col] = result_df[q_val_col].fillna(1)

    if opts['kind'] == 'tsg':
        # drop genes that never occur
        if opts['kind'] == 'tsg' or opts['kind'] == 'effect':
            no_ssvs = (result_df['Total SNV Mutations'] == 0)
            result_df = result_df[~no_ssvs]

        result_df = result_df.sort_values(by=p_val_col)
    elif opts['kind'] == 'oncogene':
        # get FDR
        result_df = result_df[result_df['Total Mutations'] > 0]
        result_df['entropy BH q-value'] = mypval.bh_fdr(
            result_df['entropy p-value'])

        # combine p-values
        result_df['tmp entropy p-value'] = result_df['entropy p-value']
        result_df['tmp vest p-value'] = result_df['vest p-value']
        result_df.loc[result_df['entropy p-value'] == 0,
                      'tmp entropy p-value'] = 1. / opts['num_iterations']
        result_df.loc[result_df['vest p-value'] == 0,
                      'tmp vest p-value'] = 1. / opts['num_iterations']
        result_df['combined p-value'] = result_df[[
            'tmp entropy p-value', 'tmp vest p-value'
        ]].apply(mypval.fishers_method, axis=1)
        result_df['combined BH q-value'] = mypval.bh_fdr(
            result_df['combined p-value'])
        del result_df['tmp vest p-value']
        del result_df['tmp entropy p-value']

    if myoutput_path:
        # write output if specified
        result_df.to_csv(myoutput_path, sep='\t', index=False)

    result_df = result_df.set_index('gene', drop=False)

    return result_df
def main(opts,
         mutation_df=None,
         frameshift_df=None):
    # get output file
    myoutput_path = opts['output']
    opts['output'] = ''

    # perform randomization-based test
    result_df = rt.main(opts, mutation_df)

    # clean up p-values for combined p-value calculation
    if opts['kind'] == 'tsg':
        p_val_col = 'inactivating p-value'
        q_val_col = 'inactivating BH q-value'
    elif opts['kind'] == 'effect':
        p_val_col = 'entropy-on-effect p-value'
        q_val_col = 'entropy-on-effect BH q-value'
    elif opts['kind'] == 'oncogene':
        p_val_col = 'entropy p-value'
        q_val_col = 'entropy BH q-value'
    elif opts['kind'] == 'protein':
        p_val_col = 'normalized graph-smoothed position entropy p-value'
        q_val_col = 'normalized graph-smoothed position entropy BH q-value'
    elif opts['kind'] == 'hotmaps1d':
        p_val_col = 'p-value'
        q_val_col = 'q-value'
    result_df[p_val_col] = result_df[p_val_col].fillna(1)
    result_df[q_val_col] = result_df[q_val_col].fillna(1)

    if opts['kind'] == 'tsg':
        # drop genes that never occur
        if opts['kind'] == 'tsg' or opts['kind'] == 'effect':
            no_ssvs = (result_df['Total SNV Mutations']==0)
            result_df = result_df[~no_ssvs]

        result_df = result_df.sort_values(by=p_val_col)
    elif opts['kind'] == 'oncogene':
        # get FDR
        result_df = result_df[result_df['Total Mutations']>0]
        result_df['entropy BH q-value'] = mypval.bh_fdr(result_df['entropy p-value'])

        # combine p-values
        result_df['tmp entropy p-value'] = result_df['entropy p-value']
        result_df['tmp vest p-value'] = result_df['vest p-value']
        result_df.loc[result_df['entropy p-value']==0, 'tmp entropy p-value'] = 1. / opts['num_iterations']
        result_df.loc[result_df['vest p-value']==0, 'tmp vest p-value'] = 1. / opts['num_iterations']
        result_df['combined p-value'] = result_df[['tmp entropy p-value', 'tmp vest p-value']].apply(mypval.fishers_method, axis=1)
        result_df['combined BH q-value'] = mypval.bh_fdr(result_df['combined p-value'])
        del result_df['tmp vest p-value']
        del result_df['tmp entropy p-value']

    if myoutput_path:
        # write output if specified
        result_df.to_csv(myoutput_path, sep='\t', index=False)

    result_df = result_df.set_index('gene', drop=False)

    return result_df
def test_ctnnb1_main():
    opts = {
        'input': os.path.join(file_dir, 'data/CTNNB1.fa'),
        'bed': os.path.join(file_dir, 'data/CTNNB1.bed'),
        'mutations': os.path.join(file_dir, 'data/CTNNB1_mutations.txt'),
        'output': os.path.join(file_dir, 'output/CTNNB1_output.txt'),
        'context': 1,
        'use_unmapped': False,
        'tsg_score': .1,
        'recurrent': 3,
        'fraction': .02,
        'score_dir': os.path.join(file_dir, 'data/scores'),
        'processes': 0,
        'num_iterations': 10000,
        'stop_criteria': 100,
        'recurrent_pseudo_count': 0,
        'unique': 0,
        'seed': None,
        'kind': 'oncogene'
    }
    # single nucleotide context
    result = pt.main(opts)
    assert result.ix[
        0,
        'entropy p-value'] < 0.001, 'CTNNB1 should have a very low p-value ({0}>.001)'.format(
            result[0][2])

    # di-nucleotide case
    opts['context'] = 2
    result = pt.main(opts)
    assert result.ix[
        0,
        'entropy p-value'] < 0.001, 'CTNNB1 should have a very low p-value ({0}>.001)'.format(
            result[0][2])

    # no context case
    opts['context'] = 0
    result = pt.main(opts)
    assert result.ix[
        0,
        'entropy p-value'] < 0.001, 'CTNNB1 should have a very low p-value ({0}>.001)'.format(
            result[0][2])
Exemple #10
0
def test_tp53_main():
    opts = {
        'input': os.path.join(file_dir, 'data/tp53.fa'),
        'bed': os.path.join(file_dir, 'data/tp53.bed'),
        'mutations': os.path.join(file_dir, 'data/tp53_mutations.txt'),
        'output': os.path.join(file_dir, 'output/tp53_output.txt'),
        'context': 1,
        'use_unmapped': False,
        'deleterious': 5,
        'processes': 0,
        'num_iterations': 10000,
        'stop_criteria': 100,
        'deleterious_pseudo_count': 0,
        'unique': False,
        'seed': None,
        'kind': 'tsg'
    }
    # single nucleotide context
    result = pt.main(opts)
    assert result.ix[
        0,
        'inactivating p-value'] < 0.001, 'TP53 should have a very low p-value ({0}>.001)'.format(
            result[0][2])

    # di-nucleotide case
    opts['context'] = 2
    result = pt.main(opts)
    assert result.ix[
        0,
        'inactivating p-value'] < 0.001, 'TP53 should have a very low p-value ({0}>.001)'.format(
            result[0][2])

    # no context case
    opts['context'] = 0
    result = pt.main(opts)
    assert result.ix[
        0,
        'inactivating p-value'] < 0.001, 'TP53 should have a very low p-value ({0}>.001)'.format(
            result[0][2])
def test_100genes_main():
    opts = {'input': os.path.join(file_dir, 'data/100genes.fa'),
            'bed': os.path.join(file_dir, 'data/100genes.bed'),
            'mutations': os.path.join(file_dir, 'data/100genes_mutations.txt'),
            'output': os.path.join(file_dir, 'output/100genes_position_single_nuc_output.txt'),
            'context': 1,
            'tsg_score': .1,
            'recurrent': 3,
            'fraction': .02,
            'use_unmapped': False,
            'processes': 0,
            'num_iterations': 1000,
            'stop_criteria': 100,
            'score_dir': os.path.join(file_dir, 'data/scores'),
            'recurrent_pseudo_count': 0,
            'unique': False,
            'seed': None,
            'kind': 'oncogene'}
    # single nucleotide context
    result = pt.main(opts)
    #tested_result = result[result['Performed Recurrency Test']==1]
    num_ent_sig = np.sum(result['entropy BH q-value'] < .1)
    assert num_ent_sig < 9, 'Few of the 100 test genes should not be significant ({0})'.format(num_ent_sig)

    # no context case
    opts['context'] = 0
    opts['output'] = os.path.join(file_dir, 'output/100genes_position_no_context_output.txt')
    result = pt.main(opts)
    #tested_result = result[result['Performed Recurrency Test']==1]
    num_ent_sig = np.sum(result['entropy BH q-value'] < .1)
    assert num_ent_sig < 9, 'Few of the 100 test genes should not be significant ({0})'.format(num_ent_sig)

    # di-nucleotide context
    opts['context'] = 2
    opts['output'] = os.path.join(file_dir, 'output/100genes_position_dinuc_output.txt')
    result = pt.main(opts)
    #tested_result = result[result['Performed Recurrency Test']==1]
    num_ent_sig = np.sum(result['entropy BH q-value'] < .1)
    assert num_ent_sig < 9, 'Few of the 100 test genes should not be significant ({0})'.format(num_ent_sig)
def test_100genes_main():
    opts = {
        'input':
        os.path.join(file_dir, 'data/100genes.fa'),
        'bed':
        os.path.join(file_dir, 'data/100genes.bed'),
        'mutations':
        os.path.join(file_dir, 'data/100genes_mutations.txt'),
        'output':
        os.path.join(file_dir,
                     'output/100genes_position_single_nuc_output.txt'),
        'context':
        1,
        'tsg_score':
        .1,
        'recurrent':
        3,
        'fraction':
        .02,
        'use_unmapped':
        False,
        'processes':
        0,
        'num_iterations':
        1000,
        'stop_criteria':
        100,
        'score_dir':
        os.path.join(file_dir, 'data/scores'),
        'recurrent_pseudo_count':
        0,
        'unique':
        False,
        'seed':
        None,
        'kind':
        'oncogene'
    }
    # single nucleotide context
    result = pt.main(opts)
    #tested_result = result[result['Performed Recurrency Test']==1]
    num_ent_sig = np.sum(result['entropy BH q-value'] < .1)
    assert num_ent_sig < 9, 'Few of the 100 test genes should not be significant ({0})'.format(
        num_ent_sig)

    # no context case
    opts['context'] = 0
    opts['output'] = os.path.join(
        file_dir, 'output/100genes_position_no_context_output.txt')
    result = pt.main(opts)
    #tested_result = result[result['Performed Recurrency Test']==1]
    num_ent_sig = np.sum(result['entropy BH q-value'] < .1)
    assert num_ent_sig < 9, 'Few of the 100 test genes should not be significant ({0})'.format(
        num_ent_sig)

    # di-nucleotide context
    opts['context'] = 2
    opts['output'] = os.path.join(file_dir,
                                  'output/100genes_position_dinuc_output.txt')
    result = pt.main(opts)
    #tested_result = result[result['Performed Recurrency Test']==1]
    num_ent_sig = np.sum(result['entropy BH q-value'] < .1)
    assert num_ent_sig < 9, 'Few of the 100 test genes should not be significant ({0})'.format(
        num_ent_sig)