def main():
    usage = 'usage: %prog anchor_results.txt anchor_results_null.txt\n'\
        'Requires two input arguments:\n'\
        '1) Interesting anchor results, output from run_anchor_batch.py\n'\
        '2) Null anchor results, output from run_anchor_batch.py\n'
    parser = OptionParser(usage=usage)
    parser.add_option('-1',
                      '--exon_label1',
                      dest='exon_label1',
                      default='Exon label 1',
                      help='Exon label of anchor_results.txt.')
    parser.add_option('-2',
                      '--exon_label2',
                      dest='exon_label2',
                      default='Exon label 2',
                      help='Exon label of anchor_results_null.txt')
    parser.add_option(
        '-t',
        '--title',
        dest='title',
        default='Fraction of exons with predicted binding regions',
        help='Title of plot.')
    (options, args) = parser.parse_args()
    if len(args) != 2:
        print 'Two arguments need to be specified in command line.\n'
        print usage
        sys.exit()
    anchor_results_path = args[0]
    anchor_results_null_path = args[1]
    exon_label1 = options.exon_label1
    exon_label2 = options.exon_label2
    mytitle = options.title

    # init dic with keys and empty lists
    anchor_dic = {}
    for key in ['binding', 'non_binding', 'total']:
        anchor_dic[key] = []

    for results in [anchor_results_path, anchor_results_null_path]:
        binding_count, total_count = count_anchor_results(results)
        non_binding_count = total_count - binding_count
        for key, val in zip(['binding', 'non_binding', 'total'],
                            [binding_count, non_binding_count, total_count]):
            anchor_dic[key].append(val)

    oddsratio, pvalue = \
        fisher_exact([anchor_dic['binding'], anchor_dic['non_binding']])

    print 'oddsratio: %s\npvalue: %s' % (oddsratio, pvalue)

    # plot distributions (from plot_meme_motif_null_comparison.py)
    mylabels = [exon_label1, exon_label2]
    # Plot bargraphs
    frac_binding = float(anchor_dic['binding'][0]) / anchor_dic['total'][0]
    frac_binding_null = float(
        anchor_dic['binding'][1]) / anchor_dic['total'][1]
    myvals = [frac_binding, frac_binding_null]
    plot_barplot(myvals, mytitle, mylabels,
                 ylabel='Fraction predicted binding regions',
                 mytext1="%i/%i" \
                    %(anchor_dic['binding'][0],
                      anchor_dic['total'][0]),
                  mytext2='%i/%i' %(anchor_dic['binding'][1],
                                    anchor_dic['total'][1]),
                  mytext3="*Fisher's Exact Test\nP-value=%.2e" %pvalue,
                  ymin=0,
                  ymax=1,
                  width=0.5)
    plt.show()
def main():
    usage = 'usage: %prog anchor_results.txt anchor_results_null.txt\n'\
        'Requires two input arguments:\n'\
        '1) Interesting anchor results, output from run_anchor_batch.py\n'\
        '2) Null anchor results, output from run_anchor_batch.py\n'
    parser = OptionParser(usage=usage)
    parser.add_option('-1', '--exon_label1', dest='exon_label1',
                      default='Exon label 1',
                      help='Exon label of anchor_results.txt.')
    parser.add_option('-2', '--exon_label2', dest='exon_label2',
                      default='Exon label 2',
                      help='Exon label of anchor_results_null.txt')
    parser.add_option('-t', '--title', dest='title',
                      default='Fraction of exons with predicted binding regions',
                      help='Title of plot.')
    (options, args) = parser.parse_args()
    if len(args) != 2:
        print 'Two arguments need to be specified in command line.\n'
        print usage
        sys.exit()
    anchor_results_path = args[0]
    anchor_results_null_path = args[1]
    exon_label1 = options.exon_label1
    exon_label2 = options.exon_label2
    mytitle = options.title

    # init dic with keys and empty lists
    anchor_dic = {}
    for key in ['binding', 'non_binding', 'total']:
        anchor_dic[key] = []

    for results in [anchor_results_path, anchor_results_null_path]:
        binding_count, total_count = count_anchor_results(results)
        non_binding_count = total_count - binding_count
        for key, val in zip(['binding', 'non_binding', 'total'],
                            [binding_count, non_binding_count, total_count]):
            anchor_dic[key].append(val)

    oddsratio, pvalue = \
        fisher_exact([anchor_dic['binding'], anchor_dic['non_binding']])

    print 'oddsratio: %s\npvalue: %s' %(oddsratio, pvalue)

    # plot distributions (from plot_meme_motif_null_comparison.py)
    mylabels = [exon_label1, exon_label2]
    # Plot bargraphs
    frac_binding = float(anchor_dic['binding'][0]) / anchor_dic['total'][0]
    frac_binding_null = float(anchor_dic['binding'][1]) / anchor_dic['total'][1]
    myvals = [frac_binding, frac_binding_null]
    plot_barplot(myvals, mytitle, mylabels,
                 ylabel='Fraction predicted binding regions',
                 mytext1="%i/%i" \
                    %(anchor_dic['binding'][0],
                      anchor_dic['total'][0]),
                  mytext2='%i/%i' %(anchor_dic['binding'][1],
                                    anchor_dic['total'][1]),
                  mytext3="*Fisher's Exact Test\nP-value=%.2e" %pvalue,
                  ymin=0,
                  ymax=1,
                  width=0.5)
    plt.show()
def main():
    usage = 'usage: %prog meme_gerp_genename_filepath output_filepath\n'\
        'Requires two input arguments:\n'\
        '1) pkl file from summarize_meme_results: non-null\n'\
        '2) pkl file from summarize_meme_results: null-mode\n'
    parser = OptionParser(usage=usage)   
    parser.add_option('-t', '--threshold', dest='score_threshold',
                      default=2.0,
                      help='Float, threshold for what one considers conserved.') 
    parser.add_option('-y', '--ymax', dest='ymax',
                      type='float',
                      default=0.03,
                      help='Y max for density plot')
    (options, args) = parser.parse_args()
    if len(args) < 2:
        print 'Two arguments need to be specified in command line.\n'
        print usage
        sys.exit()
    non_null_pklpath = args[0]
    null_pklpath = args[1]
    # parse ops
    score_threshold = float(options.score_threshold)
    
    # get dics from pkl 
    non_null_dic = get_dic_from_pklpath(non_null_pklpath)
    null_dic = get_dic_from_pklpath(null_pklpath)
    
    non_null_gerp_scores = get_gerp_scores(non_null_dic, gerpkey='avg_rs_score')
    null_gerp_scores = get_gerp_scores(null_dic, gerpkey='avg_rs_score')
    
    plot_functions.plot_density([non_null_gerp_scores, null_gerp_scores], 
                                mytitle='Density plot of conservation scores', 
                                labels_lists=['MEME motifs', 'Controls'],
                                xlabel='GERP conservation score',
                                ylabel='Density',
                                xmin=-4, xmax=4,
                                ymax=options.ymax,
                                smoothness=0.15,
                                drawvline=score_threshold)
    
    # find how many conserved regions are in each.
    n_conserved_in_meme = \
        gerp_utilities.conserved_regions(non_null_gerp_scores, fraction=False, threshold=score_threshold)
    n_conserved_in_null = \
        gerp_utilities.conserved_regions(null_gerp_scores, fraction=False, threshold=score_threshold)
    n_total_in_meme = len(non_null_gerp_scores)
    n_total_in_null = len(null_gerp_scores)
    n_not_conserved_in_meme = n_total_in_meme - n_conserved_in_meme
    n_not_conserved_in_null = n_total_in_null - n_conserved_in_null
    
    print 'Threshold: %s' %score_threshold
    print 'Number of conserved elements: %s' %n_conserved_in_meme
    print 'Number of conserved elements found in control: %s' %n_conserved_in_null
    
    # Perform fisher's exact test
    oddsratio, pvalue = fisher_exact([[n_conserved_in_meme, 
                                       n_conserved_in_null], 
                                      [n_not_conserved_in_meme, 
                                       n_not_conserved_in_null]])
    print 'Fishers Exact Test, Oddsratio: %s. Pvalue: %s' %(oddsratio, pvalue)
    
    # plot distributions
    mylabels = ['Meme motifs', 'Control region']
    mytitle = 'Fraction of elements conserved compared to control region'
    # Plot bargraphs
    frac_conserved_meme = float(n_conserved_in_meme) / n_total_in_meme
    frac_conserved_null = float(n_conserved_in_null) / n_total_in_null
    myvals = [frac_conserved_meme, frac_conserved_null]
    plot_functions.plot_barplot(myvals, mytitle, mylabels, 
                                ylabel='Fraction of elements conserved', 
                                mytext1="%i/%i" \
                                    %(n_conserved_in_meme, 
                                      n_total_in_meme),
                                mytext2='%i/%i' %(n_conserved_in_null, 
                                                  n_total_in_null),
                                mytext3="*Fisher's Exact Test\nP-value=%.2e" %pvalue,
                                ymin=0,
                                ymax=1,
                                width=0.5)
    plt.show()
Beispiel #4
0
def main():
    usage = 'usage: %prog meme_gerp_genename_filepath output_filepath\n'\
        'Requires two input arguments:\n'\
        '1) pkl file from summarize_meme_results: non-null\n'\
        '2) pkl file from summarize_meme_results: null-mode\n'
    parser = OptionParser(usage=usage)
    parser.add_option(
        '-t',
        '--threshold',
        dest='score_threshold',
        default=2.0,
        help='Float, threshold for what one considers conserved.')
    parser.add_option('-y',
                      '--ymax',
                      dest='ymax',
                      type='float',
                      default=0.03,
                      help='Y max for density plot')
    (options, args) = parser.parse_args()
    if len(args) < 2:
        print 'Two arguments need to be specified in command line.\n'
        print usage
        sys.exit()
    non_null_pklpath = args[0]
    null_pklpath = args[1]
    # parse ops
    score_threshold = float(options.score_threshold)

    # get dics from pkl
    non_null_dic = get_dic_from_pklpath(non_null_pklpath)
    null_dic = get_dic_from_pklpath(null_pklpath)

    non_null_gerp_scores = get_gerp_scores(non_null_dic,
                                           gerpkey='avg_rs_score')
    null_gerp_scores = get_gerp_scores(null_dic, gerpkey='avg_rs_score')

    plot_functions.plot_density([non_null_gerp_scores, null_gerp_scores],
                                mytitle='Density plot of conservation scores',
                                labels_lists=['MEME motifs', 'Controls'],
                                xlabel='GERP conservation score',
                                ylabel='Density',
                                xmin=-4,
                                xmax=4,
                                ymax=options.ymax,
                                smoothness=0.15,
                                drawvline=score_threshold)

    # find how many conserved regions are in each.
    n_conserved_in_meme = \
        gerp_utilities.conserved_regions(non_null_gerp_scores, fraction=False, threshold=score_threshold)
    n_conserved_in_null = \
        gerp_utilities.conserved_regions(null_gerp_scores, fraction=False, threshold=score_threshold)
    n_total_in_meme = len(non_null_gerp_scores)
    n_total_in_null = len(null_gerp_scores)
    n_not_conserved_in_meme = n_total_in_meme - n_conserved_in_meme
    n_not_conserved_in_null = n_total_in_null - n_conserved_in_null

    print 'Threshold: %s' % score_threshold
    print 'Number of conserved elements: %s' % n_conserved_in_meme
    print 'Number of conserved elements found in control: %s' % n_conserved_in_null

    # Perform fisher's exact test
    oddsratio, pvalue = fisher_exact(
        [[n_conserved_in_meme, n_conserved_in_null],
         [n_not_conserved_in_meme, n_not_conserved_in_null]])
    print 'Fishers Exact Test, Oddsratio: %s. Pvalue: %s' % (oddsratio, pvalue)

    # plot distributions
    mylabels = ['Meme motifs', 'Control region']
    mytitle = 'Fraction of elements conserved compared to control region'
    # Plot bargraphs
    frac_conserved_meme = float(n_conserved_in_meme) / n_total_in_meme
    frac_conserved_null = float(n_conserved_in_null) / n_total_in_null
    myvals = [frac_conserved_meme, frac_conserved_null]
    plot_functions.plot_barplot(myvals, mytitle, mylabels,
                                ylabel='Fraction of elements conserved',
                                mytext1="%i/%i" \
                                    %(n_conserved_in_meme,
                                      n_total_in_meme),
                                mytext2='%i/%i' %(n_conserved_in_null,
                                                  n_total_in_null),
                                mytext3="*Fisher's Exact Test\nP-value=%.2e" %pvalue,
                                ymin=0,
                                ymax=1,
                                width=0.5)
    plt.show()