def save_scores(fimo_dir): fimo_file = os.path.join(fimo_dir, 'fimo.txt') fimo_sites = fimo_to_sites(fimo_file) subset = fimo_sites.loc[:, [ 'chrom', 'motifStartZeroBased', 'motifEndOneBased', 'strand' ]] subset.loc[:, 'motifStartZeroBased'] = subset[ 'motifStartZeroBased'] - flank_length subset.loc[:, 'motifEndOneBased'] = subset['motifEndOneBased'] + flank_length intervals = [tuple(x) for x in subset.to_records(index=False)] scores_phastcons = phastcons_wig.query(intervals) if len(fimo_sites.index): scores_phastcons_mean = np.nanmean(scores_phastcons, axis=0) np.savetxt(os.path.join(fimo_dir, 'phastcons.raw.txt'), scores_phastcons, fmt='%.4f') np.savetxt(os.path.join(fimo_dir, 'phastcons.mean.txt'), scores_phastcons_mean, fmt='%.4f') else: touch(os.path.join(fimo_dir, 'phastcons.raw.txt')) touch(os.path.join(fimo_dir, 'phastcons.mean.txt'))
def test_fimostd(self): """Test fimo to sites with standard sequence names""" fimo_file = 'tests/data/fimo.std.txt' fimo_df = fimo_to_sites(os.path.abspath(fimo_file)) for _, row in fimo_df.iterrows(): record_id = row['sequence name'] fimo_sequence = row['matched sequence'] start = row['start'] end = row['stop'] # 1-based end strand = row['strand'] assert start-1 == row['motifStartZeroBased'] assert end == row['motifEndOneBased']
def test_fimotosites(self): """Test fimo_to_sites""" fimo_file = 'tests/data/expected_out/fimo_analysis/fimo.txt' record_dict = SeqIO.to_dict(SeqIO.parse(open(self.meme_fasta), 'fasta')) fimo_df = fimo_to_sites(os.path.abspath(fimo_file)) for _, row in fimo_df.iterrows(): record_id = row['sequence name'] fimo_sequence = row['matched sequence'] start = row['start']-1 # 0-based start end = row['stop'] # 1-based end strand = row['strand'] fasta_sequence = record_dict[record_id].seq if strand == '+': assert str(fasta_sequence)[start:end] == fimo_sequence else: assert str(fasta_sequence[start:end].reverse_complement()) == fimo_sequence