예제 #1
0
def get_structure_motif_fig(filter_weights,
                            filter_outs,
                            out_dir,
                            protein,
                            seq_targets,
                            sample_i=0,
                            structure=None):
    print 'plot motif fig', out_dir
    #seqs, seq_targets = get_seq_targets(protein)
    seqs = structure
    if sample_i:
        print 'sampling'
        seqs = []
        for ind, val in enumerate(seqs):
            if ind in sample_i:
                seqs.append(val)

        seq_targets = seq_targets[sample_i]
        filter_outs = filter_outs[sample_i]

    num_filters = filter_weights.shape[0]
    filter_size = 7  #filter_weights.shape[2]

    filters_ic = []
    meme_out = structure_motifs.meme_intro('%s/filters_meme.txt' % out_dir,
                                           seqs)

    for f in range(num_filters):
        print 'Filter %d' % f

        # plot filter parameters as a heatmap
        structure_motifs.plot_filter_heat(
            filter_weights[f, :, :], '%s/filter%d_heat.pdf' % (out_dir, f))

        # write possum motif file
        structure_motifs.filter_possum(filter_weights[f, :, :], 'filter%d' % f,
                                       '%s/filter%d_possum.txt' % (out_dir, f),
                                       False)

        structure_motifs.plot_filter_logo(filter_outs[:, :, f],
                                          filter_size,
                                          seqs,
                                          '%s/filter%d_logo' % (out_dir, f),
                                          maxpct_t=0.5)

        filter_pwm, nsites = structure_motifs.make_filter_pwm(
            '%s/filter%d_logo.fa' % (out_dir, f))
        if nsites < 10:
            # no information
            filters_ic.append(0)
        else:
            # compute and save information content
            filters_ic.append(info_content(filter_pwm))

            # add to the meme motif file
            structure_motifs.meme_add(meme_out, f, filter_pwm, nsites, False)

    meme_out.close()
예제 #2
0
def get_motif_fig_new(filter_weights, filter_outs, out_dir, seqs, sample_i=0):
    print('plot motif fig', out_dir)
    # seqs, seq_targets = get_seq_targets(protein)
    if sample_i:
        print('sampling')
        seqs = []
        for ind, val in enumerate(seqs):
            if ind in sample_i:
                seqs.append(val)

        # seq_targets = seq_targets[sample_i]
        filter_outs = filter_outs[sample_i]

    num_filters = filter_weights.shape[0]
    filter_size = 7  # filter_weights.shape[2]

    # pdb.set_trace()
    #################################################################
    # individual filter plots
    #################################################################
    # also save information contents
    filters_ic = []
    meme_out = meme_intro('%s/filters_meme.txt' % out_dir, seqs)

    for f in range(num_filters):
        print('Filter %d' % f)

        # plot filter parameters as a heatmap
        structure_motifs.plot_filter_heat(
            filter_weights[f, :, :], '%s/filter%d_heat.pdf' % (out_dir, f))

        # write possum motif file
        structure_motifs.filter_possum(filter_weights[f, :, :], 'filter%d' % f,
                                       '%s/filter%d_possum.txt' % (out_dir, f),
                                       False)

        # plot weblogo of high scoring outputs
        structure_motifs.plot_filter_logo(filter_outs[:, :, f],
                                          filter_size,
                                          seqs,
                                          '%s/filter%d_logo' % (out_dir, f),
                                          maxpct_t=0.5)

        # make a PWM for the filter
        filter_pwm, nsites = structure_motifs.make_filter_pwm(
            '%s/filter%d_logo.fa' % (out_dir, f))

        if nsites < 10:
            # no information
            filters_ic.append(0)
        else:
            # compute and save information content
            filters_ic.append(structure_motifs.info_content(filter_pwm))

            # add to the meme motif file
            structure_motifs.meme_add(meme_out, f, filter_pwm, nsites, False)

    meme_out.close()

    #################################################################
    # annotate filters
    #################################################################
    # run tomtom #-evalue 0.01

    #sp.call('docker stop rnashapes')

    docker_start = "docker start memesuite"
    sp.check_output(docker_start, shell=True)

    sp.call(
        'docker exec -it tomtom -dist pearson -thresh 0.05 -eps -oc %s/tomtom %s/filters_meme.txt %s'
        % (out_dir, out_dir, 'Ray2013_rbp_RNA.meme'),
        shell=True)

    sp.check_output("docker stop memesuite", shell=True)

    # subprocess.call('tomtom -dist pearson -thresh 0.05 -eps -oc %s/tomtom %s/filters_meme.txt %s' % (out_dir, out_dir, 'Ray2013_rbp_RNA.meme'), shell=True)

    # read in annotations
    filter_names = structure_motifs.name_filters(
        num_filters, '%s/tomtom/tomtom.txt' % out_dir, 'Ray2013_rbp_RNA.meme')

    #################################################################
    # print a table of information
    #################################################################
    table_out = open('%s/table.txt' % out_dir, 'w')

    # print header for later panda reading
    header_cols = ('', 'consensus', 'annotation', 'ic', 'mean', 'std')
    print >> table_out, '%3s  %19s  %10s  %5s  %6s  %6s' % header_cols

    for f in range(num_filters):
        # collapse to a consensus motif
        consensus = structure_motifs.filter_motif(filter_weights[f, :, :])

        # grab annotation
        annotation = '.'
        name_pieces = filter_names[f].split('_')
        if len(name_pieces) > 1:
            annotation = name_pieces[1]

        # plot density of filter output scores
        fmean, fstd = structure_motifs.plot_score_density(
            np.ravel(filter_outs[:, :, f]),
            '%s/filter%d_dens.pdf' % (out_dir, f))

        row_cols = (f, consensus, annotation, filters_ic[f], fmean, fstd)
        print >> table_out, '%-3d  %19s  %10s  %5.2f  %6.4f  %6.4f' % row_cols

    table_out.close()

    if True:
        new_outs = []
        for val in filter_outs:
            new_outs.append(val.T)
        filter_outs = np.array(new_outs)
        print(filter_outs.shape)
        # plot filter-sequence heatmap
        structure_motifs.plot_filter_seq_heat(filter_outs,
                                              '%s/filter_seqs.pdf' % out_dir)