def add_info( peaks, rip_targets, gtf_sep_cols, gtf_sep_cols_dict, peaks_fname, sequences, deseq=None, cufflinks=None, ): """Adds sequences, fbes, RIP-chip and peak location. Takes a dataframe. Returns the altered dataframe.""" if (deseq is not None) and (cufflinks is not None): # Compare with the list in Ortiz et al. of gonadal genes. add_gonad_expression(peaks, deseq, cufflinks) # Add genomic sequence in the peak. peaks = subset_peaks_with_fbe.add_seqs(peaks, sequences) # Get yes/no FBE, -1/-2 C, number of FBEs. subset_peaks_with_fbe.score_binding_site(peaks) # Overlaps with the top 1350 unique RIP-chip targets? compare_with_ripchip.add_column_of_overlap(peaks, rip_targets) # Locate each peak in the gene. if re.search('fbf1', peaks_fname): locate_in_gene(gtf_sep_cols_dict, peaks, use_this_column='fbf1_reads_pos_of_max_coverage') elif re.search('fbf2', peaks_fname): locate_in_gene(gtf_sep_cols_dict, peaks, use_this_column='fbf2_reads_pos_of_max_coverage') else: locate_in_gene(gtf_sep_cols_dict, peaks) return peaks
def add_info(peaks, rip_targets, gtf_sep_cols, gtf_sep_cols_dict, peaks_fname, sequences, deseq=None, cufflinks=None, ): """Adds sequences, fbes, RIP-chip and peak location. Takes a dataframe. Returns the altered dataframe.""" if (deseq is not None) and (cufflinks is not None): # Compare with the list in Ortiz et al. of gonadal genes. add_gonad_expression(peaks, deseq, cufflinks) # Add genomic sequence in the peak. peaks = subset_peaks_with_fbe.add_seqs(peaks, sequences) # Get yes/no FBE, -1/-2 C, number of FBEs. subset_peaks_with_fbe.score_binding_site(peaks) # Overlaps with the top 1350 unique RIP-chip targets? compare_with_ripchip.add_column_of_overlap( peaks, rip_targets) # Locate each peak in the gene. if re.search('fbf1', peaks_fname): locate_in_gene( gtf_sep_cols_dict, peaks, use_this_column='fbf1_reads_pos_of_max_coverage') elif re.search('fbf2', peaks_fname): locate_in_gene( gtf_sep_cols_dict, peaks, use_this_column='fbf2_reads_pos_of_max_coverage') else: locate_in_gene( gtf_sep_cols_dict, peaks) return peaks
def add_minimal_info(peaks, rip_targets, peaks_fname, sequences): # Compare with the list in Ortiz et al. of gonadal genes. # Add genomic sequence in the peak. print '1' peaks = subset_peaks_with_fbe.add_seqs(peaks, sequences) # Get yes/no FBE, -1/-2 C, number of FBEs. print '2' subset_peaks_with_fbe.score_binding_site(peaks) # Overlaps with the top 1350 unique RIP-chip targets? print '3' compare_with_ripchip.add_column_of_overlap(peaks, rip_targets) print 'done'
def add_info(peaks, filename, rip_targets, gtf_sep_cols, top_level_dir): # Add genomic sequence in the peak. subset_peaks_with_fbe.get_sequences(peaks) # Get yes/no FBE, -1/-2 C, number of FBEs. subset_peaks_with_fbe.score_binding_site(peaks) # Overlaps with the top 1350 unique RIP-chip targets? compare_with_ripchip.add_column_of_overlap( peaks, rip_targets) # Locate each peak in the gene. locate_in_gene(gtf_sep_cols, peaks) subset_peaks_with_fbe.write_subset_with_fbe( peaks, top_level_dir, label=os.path.basename(filename)) write_subset_of_columns( peaks, top_level_dir, label=os.path.basename(filename))
def add_minimal_info(peaks, rip_targets, peaks_fname, sequences): # Compare with the list in Ortiz et al. of gonadal genes. # Add genomic sequence in the peak. print '1' peaks = subset_peaks_with_fbe.add_seqs(peaks, sequences) # Get yes/no FBE, -1/-2 C, number of FBEs. print '2' subset_peaks_with_fbe.score_binding_site(peaks) # Overlaps with the top 1350 unique RIP-chip targets? print '3' compare_with_ripchip.add_column_of_overlap( peaks, rip_targets) print 'done'
def add_seqs(combined, sequences): seq = [seq_from_iv(tup[0], tup[1], tup[2], tup[3], sequences) \ for tup in \ zip(combined.chrm, combined.left, combined.right, combined.strand)] combined['seq'] = seq return combined def write_subset_with_fbe(peaks, top_level_dir, label): top_level_dir = os.path.dirname(top_level_dir) peaks_w_fbe = peaks[peaks['has_fbe']==1] if not os.path.exists('with_fbe_%s' % top_level_dir): os.system('mkdir with_fbe_%s' % top_level_dir) peaks_w_fbe.to_csv('with_fbe_%s/%s' % (top_level_dir, label), sep='\t') if __name__ == '__main__': top_level_dir = sys.argv[1] combined = {} rip_targets = compare_with_ripchip.get_ripchip_targets() for filename in glob.glob(top_level_dir + '/combined*.txt'): print filename combined[filename] = pandas.read_csv(filename, sep='\t') get_sequences(combined[filename]) score_binding_site(combined[filename]) compare_with_ripchip.add_column_of_overlap( combined[filename], rip_targets) write_subset_with_fbe( combined[filename], top_level_dir, label=os.path.basename(filename))