Beispiel #1
0
def add_info(
    peaks,
    rip_targets,
    gtf_sep_cols,
    gtf_sep_cols_dict,
    peaks_fname,
    sequences,
    deseq=None,
    cufflinks=None,
):
    """Adds sequences, fbes, RIP-chip and peak location.
    Takes a dataframe.
    Returns the altered dataframe."""
    if (deseq is not None) and (cufflinks is not None):
        # Compare with the list in Ortiz et al. of gonadal genes.
        add_gonad_expression(peaks, deseq, cufflinks)
    # Add genomic sequence in the peak.
    peaks = subset_peaks_with_fbe.add_seqs(peaks, sequences)
    # Get yes/no FBE, -1/-2 C, number of FBEs.
    subset_peaks_with_fbe.score_binding_site(peaks)
    # Overlaps with the top 1350 unique RIP-chip targets?
    compare_with_ripchip.add_column_of_overlap(peaks, rip_targets)
    # Locate each peak in the gene.
    if re.search('fbf1', peaks_fname):
        locate_in_gene(gtf_sep_cols_dict,
                       peaks,
                       use_this_column='fbf1_reads_pos_of_max_coverage')
    elif re.search('fbf2', peaks_fname):
        locate_in_gene(gtf_sep_cols_dict,
                       peaks,
                       use_this_column='fbf2_reads_pos_of_max_coverage')
    else:
        locate_in_gene(gtf_sep_cols_dict, peaks)
    return peaks
Beispiel #2
0
def add_info(peaks, rip_targets, gtf_sep_cols,
             gtf_sep_cols_dict, peaks_fname, sequences,
             deseq=None, cufflinks=None,
             ):
    """Adds sequences, fbes, RIP-chip and peak location.
    Takes a dataframe.
    Returns the altered dataframe."""
    if (deseq is not None) and (cufflinks is not None):
    # Compare with the list in Ortiz et al. of gonadal genes.
        add_gonad_expression(peaks, deseq, cufflinks)
    # Add genomic sequence in the peak.
    peaks = subset_peaks_with_fbe.add_seqs(peaks, sequences)
    # Get yes/no FBE, -1/-2 C, number of FBEs.
    subset_peaks_with_fbe.score_binding_site(peaks)
    # Overlaps with the top 1350 unique RIP-chip targets?
    compare_with_ripchip.add_column_of_overlap(
        peaks, rip_targets)
    # Locate each peak in the gene.
    if re.search('fbf1', peaks_fname):
        locate_in_gene(
            gtf_sep_cols_dict, peaks, use_this_column='fbf1_reads_pos_of_max_coverage')
    elif re.search('fbf2', peaks_fname):
        locate_in_gene(
            gtf_sep_cols_dict, peaks, use_this_column='fbf2_reads_pos_of_max_coverage')
    else:
        locate_in_gene(
            gtf_sep_cols_dict, peaks)
    return peaks
Beispiel #3
0
def add_minimal_info(peaks, rip_targets, peaks_fname, sequences):
    # Compare with the list in Ortiz et al. of gonadal genes.
    # Add genomic sequence in the peak.
    print '1'
    peaks = subset_peaks_with_fbe.add_seqs(peaks, sequences)
    # Get yes/no FBE, -1/-2 C, number of FBEs.
    print '2'
    subset_peaks_with_fbe.score_binding_site(peaks)
    # Overlaps with the top 1350 unique RIP-chip targets?
    print '3'
    compare_with_ripchip.add_column_of_overlap(peaks, rip_targets)
    print 'done'
Beispiel #4
0
def add_info(peaks, filename, rip_targets, gtf_sep_cols, top_level_dir):
        # Add genomic sequence in the peak.
        subset_peaks_with_fbe.get_sequences(peaks)
        # Get yes/no FBE, -1/-2 C, number of FBEs.
        subset_peaks_with_fbe.score_binding_site(peaks)
        # Overlaps with the top 1350 unique RIP-chip targets?
        compare_with_ripchip.add_column_of_overlap(
            peaks, rip_targets)
        # Locate each peak in the gene.
        locate_in_gene(gtf_sep_cols, peaks)
        subset_peaks_with_fbe.write_subset_with_fbe(
            peaks, top_level_dir, label=os.path.basename(filename))
        write_subset_of_columns(
            peaks, top_level_dir, label=os.path.basename(filename))
Beispiel #5
0
def add_minimal_info(peaks, rip_targets,
                    peaks_fname, sequences):
        # Compare with the list in Ortiz et al. of gonadal genes.
        # Add genomic sequence in the peak.
        print '1'
        peaks = subset_peaks_with_fbe.add_seqs(peaks, sequences)
        # Get yes/no FBE, -1/-2 C, number of FBEs.
        print '2'
        subset_peaks_with_fbe.score_binding_site(peaks)
        # Overlaps with the top 1350 unique RIP-chip targets?
        print '3'
        compare_with_ripchip.add_column_of_overlap(
            peaks, rip_targets)
        print 'done'
Beispiel #6
0
def add_seqs(combined, sequences):
    seq = [seq_from_iv(tup[0], tup[1], tup[2], tup[3], sequences) \
           for tup in \
     zip(combined.chrm, combined.left, combined.right, combined.strand)]
    combined['seq'] = seq
    return combined


def write_subset_with_fbe(peaks, top_level_dir, label):
    top_level_dir = os.path.dirname(top_level_dir)
    peaks_w_fbe = peaks[peaks['has_fbe']==1]
    if not os.path.exists('with_fbe_%s' % top_level_dir):
        os.system('mkdir with_fbe_%s' % top_level_dir)
    peaks_w_fbe.to_csv('with_fbe_%s/%s' % (top_level_dir, label), sep='\t')


if __name__ == '__main__':
    top_level_dir = sys.argv[1]
    combined = {}
    rip_targets = compare_with_ripchip.get_ripchip_targets()
    for filename in glob.glob(top_level_dir + '/combined*.txt'):
        print filename
        combined[filename] = pandas.read_csv(filename, sep='\t')
        get_sequences(combined[filename])
        score_binding_site(combined[filename])
        compare_with_ripchip.add_column_of_overlap(
            combined[filename], rip_targets)
        write_subset_with_fbe(
            combined[filename], top_level_dir, label=os.path.basename(filename))