Beispiel #1
0
def main():
    usage = 'usage: %prog [options] <mut1 file> <mut2 file>'
    parser = OptionParser(usage)
    parser.add_option('-m', dest='mut_norm', action='store_true', default=False, help='Normalize by # mutations (as opposed to sequenced bp) [Default: %default]')
    parser.add_option('-o', dest='output_pdf', default='mut_cmp.pdf', help='Output pdf file for heatmap [Default: %default]')
    parser.add_option('-r', dest='raw', action='store_true', default=False, help='Use raw mutation counts (as opposed to normalized for ACGT content) [Default: %default]')
    (options,args) = parser.parse_args()

    if len(args) != 2:
        parser.error(usage)
    else:
        mut1_file = args[0]
        mut2_file = args[1]

    mutation_profile1, seq_bp1 = parse_mutations(mut1_file, options.raw)
    mutation_profile2, seq_bp2 = parse_mutations(mut2_file, options.raw)

    relative_mutation_profile = compute_relative_profile(mutation_profile1, seq_bp1, mutation_profile2, seq_bp2)

    print_table(relative_mutation_profile)

    # make plotting data structures
    nts = ['_','A','C','G','T']
    nts1 = []
    nts2 = []
    rel = []
    for nt1 in nts:
        for nt2 in nts:
            nts1.append(nt1)
            nts2.append(nt2)
            rel.append(relative_mutation_profile[(nt1,nt2)])

    nts1_r = ro.StrVector(nts1)
    nts2_r = ro.StrVector(nts2)
    rel_r = ro.FloatVector(rel)

    df = ro.DataFrame({'nt1':nts1_r, 'nt2':nts2_r, 'rel':rel_r})

    # plot
    '''
    gp = ggplot2.ggplot(df) + \
        ggplot2.aes_string(x='nt2', y='nt1', fill='rel') + \
        ggplot2.geom_tile() + \
        ggplot2.scale_x_discrete(mut2_file, limits=nts) + \
        ggplot2.scale_y_discrete(mut1_file, limits=nts) + \
        ggplot2.scale_fill_gradient('Enrichment 1/2')
    '''

    gp = ggplot2.ggplot(df) + \
        ggplot2.aes_string(x='nt2', y='nt1', fill='rel') + \
        ggplot2.geom_tile() + \
        ggplot2.scale_x_discrete('Read') + \
        ggplot2.scale_y_discrete('Reference') + \
        ggplot2.scale_fill_gradient2('log2 enrichment', low='darkblue', mid='white', high='darkred')

    # save to file
    grdevices.pdf(file=options.output_pdf)
    gp.plot()
    grdevices.dev_off()
Beispiel #2
0
def gray_plot(data, min=0, max=1, name=""):
    reshape = importr('reshape')
    gg = ggplot2.ggplot(reshape.melt(data,id_var=['x','y']))
    pg = gg + ggplot2.aes_string(x='L1',y='L2')+ \
         ggplot2.geom_tile(ggplot2.aes_string(fill='value'))+ \
         ggplot2.scale_fill_gradient(low="black", high="white",limits=FloatVector((min,max)))+ \
         ggplot2.coord_equal() + ggplot2.scale_x_continuous(name)
    return pg
def gray_plot(data, min=0, max=1, name=""):
    reshape = importr('reshape')
    gg = ggplot2.ggplot(reshape.melt(data, id_var=['x', 'y']))
    pg = gg + ggplot2.aes_string(x='L1',y='L2')+ \
         ggplot2.geom_tile(ggplot2.aes_string(fill='value'))+ \
         ggplot2.scale_fill_gradient(low="black", high="white",limits=FloatVector((min,max)))+ \
         ggplot2.coord_equal() + ggplot2.scale_x_continuous(name)
    return pg
 def plot_similarity_matrix(self, item_type, image_file, title):
     '''Plot similarities of crawls (overlap of unique items)
     as heat map matrix'''
     data = defaultdict(dict)
     n = 1
     for crawl1 in self.similarity[item_type]:
         for crawl2 in self.similarity[item_type][crawl1]:
             similarity = self.similarity[item_type][crawl1][crawl2]
             data['crawl1'][n] = MonthlyCrawl.short_name(crawl1)
             data['crawl2'][n] = MonthlyCrawl.short_name(crawl2)
             data['similarity'][n] = similarity
             data['sim_rounded'][n] = similarity  # to be rounded
             n += 1
     data = pandas.DataFrame(data)
     print(data)
     # select median of similarity values as midpoint of similarity scale
     midpoint = data['similarity'].median()
     decimals = 3
     textsize = 2
     minshown = .0005
     if (data['similarity'].max()-data['similarity'].min()) > .2:
         decimals = 2
         textsize = 2.8
         minshown = .005
     data['sim_rounded'] = data['sim_rounded'].apply(
         lambda x: ('{0:.'+str(decimals)+'f}').format(x).lstrip('0')
         if x >= minshown else '0')
     print('Median of similarities for', item_type, '=', midpoint)
     matrix_size = len(self.similarity[item_type])
     if matrix_size > self.MAX_MATRIX_SIZE:
         n = 0
         for crawl1 in sorted(self.similarity[item_type], reverse=True):
             short_name = MonthlyCrawl.short_name(crawl1)
             if n > self.MAX_MATRIX_SIZE:
                 data = data[data['crawl1'] != short_name]
                 data = data[data['crawl2'] != short_name]
             n += 1
     p = ggplot2.ggplot(data) \
         + ggplot2.aes_string(x='crawl2', y='crawl1',
                              fill='similarity', label='sim_rounded') \
         + ggplot2.geom_tile(color="white") \
         + ggplot2.scale_fill_gradient2(low="red", high="blue", mid="white",
                                        midpoint=midpoint, space="Lab") \
         + GGPLOT2_THEME \
         + ggplot2.coord_fixed() \
         + ggplot2.theme(**{'axis.text.x':
                            ggplot2.element_text(angle=45,
                                                 vjust=1, hjust=1)}) \
         + ggplot2.labs(title=title, x='', y='') \
         + ggplot2.geom_text(color='black', size=textsize)
     img_path = os.path.join(PLOTDIR, image_file)
     p.save(img_path)
     return p
def main():
    usage = 'usage: %prog [options] <mut1 file> <mut2 file>'
    parser = OptionParser(usage)
    parser.add_option(
        '-m',
        dest='mut_norm',
        action='store_true',
        default=False,
        help=
        'Normalize by # mutations (as opposed to sequenced bp) [Default: %default]'
    )
    parser.add_option('-o',
                      dest='output_pdf',
                      default='mut_cmp.pdf',
                      help='Output pdf file for heatmap [Default: %default]')
    parser.add_option(
        '-r',
        dest='raw',
        action='store_true',
        default=False,
        help=
        'Use raw mutation counts (as opposed to normalized for ACGT content) [Default: %default]'
    )
    (options, args) = parser.parse_args()

    if len(args) != 2:
        parser.error(usage)
    else:
        mut1_file = args[0]
        mut2_file = args[1]

    mutation_profile1, seq_bp1 = parse_mutations(mut1_file, options.raw)
    mutation_profile2, seq_bp2 = parse_mutations(mut2_file, options.raw)

    relative_mutation_profile = compute_relative_profile(
        mutation_profile1, seq_bp1, mutation_profile2, seq_bp2)

    print_table(relative_mutation_profile)

    # make plotting data structures
    nts = ['_', 'A', 'C', 'G', 'T']
    nts1 = []
    nts2 = []
    rel = []
    for nt1 in nts:
        for nt2 in nts:
            nts1.append(nt1)
            nts2.append(nt2)
            rel.append(relative_mutation_profile[(nt1, nt2)])

    nts1_r = ro.StrVector(nts1)
    nts2_r = ro.StrVector(nts2)
    rel_r = ro.FloatVector(rel)

    df = ro.DataFrame({'nt1': nts1_r, 'nt2': nts2_r, 'rel': rel_r})

    # plot
    '''
    gp = ggplot2.ggplot(df) + \
        ggplot2.aes_string(x='nt2', y='nt1', fill='rel') + \
        ggplot2.geom_tile() + \
        ggplot2.scale_x_discrete(mut2_file, limits=nts) + \
        ggplot2.scale_y_discrete(mut1_file, limits=nts) + \
        ggplot2.scale_fill_gradient('Enrichment 1/2')
    '''

    gp = ggplot2.ggplot(df) + \
        ggplot2.aes_string(x='nt2', y='nt1', fill='rel') + \
        ggplot2.geom_tile() + \
        ggplot2.scale_x_discrete('Read') + \
        ggplot2.scale_y_discrete('Reference') + \
        ggplot2.scale_fill_gradient2('log2 enrichment', low='darkblue', mid='white', high='darkred')

    # save to file
    grdevices.pdf(file=options.output_pdf)
    gp.plot()
    grdevices.dev_off()