Ejemplo n.º 1
0
def save_scores(fimo_dir):

    fimo_file = os.path.join(fimo_dir, 'fimo.txt')
    fimo_sites = fimo_to_sites(fimo_file)

    subset = fimo_sites.loc[:, [
        'chrom', 'motifStartZeroBased', 'motifEndOneBased', 'strand'
    ]]
    subset.loc[:, 'motifStartZeroBased'] = subset[
        'motifStartZeroBased'] - flank_length
    subset.loc[:,
               'motifEndOneBased'] = subset['motifEndOneBased'] + flank_length
    intervals = [tuple(x) for x in subset.to_records(index=False)]

    scores_phastcons = phastcons_wig.query(intervals)

    if len(fimo_sites.index):
        scores_phastcons_mean = np.nanmean(scores_phastcons, axis=0)
        np.savetxt(os.path.join(fimo_dir, 'phastcons.raw.txt'),
                   scores_phastcons,
                   fmt='%.4f')
        np.savetxt(os.path.join(fimo_dir, 'phastcons.mean.txt'),
                   scores_phastcons_mean,
                   fmt='%.4f')
    else:
        touch(os.path.join(fimo_dir, 'phastcons.raw.txt'))
        touch(os.path.join(fimo_dir, 'phastcons.mean.txt'))
Ejemplo n.º 2
0
 def test_fimostd(self):
     """Test fimo to sites with
     standard sequence names"""
     fimo_file = 'tests/data/fimo.std.txt'
     fimo_df = fimo_to_sites(os.path.abspath(fimo_file))
     for _, row in fimo_df.iterrows():
         record_id = row['sequence name']
         fimo_sequence = row['matched sequence']
         start = row['start']
         end = row['stop'] # 1-based end
         strand = row['strand']
         assert start-1 == row['motifStartZeroBased']
         assert end == row['motifEndOneBased']
Ejemplo n.º 3
0
 def test_fimotosites(self):
     """Test fimo_to_sites"""
     fimo_file = 'tests/data/expected_out/fimo_analysis/fimo.txt'
     record_dict = SeqIO.to_dict(SeqIO.parse(open(self.meme_fasta), 'fasta'))
     fimo_df = fimo_to_sites(os.path.abspath(fimo_file))
     for _, row in fimo_df.iterrows():
         record_id = row['sequence name']
         fimo_sequence = row['matched sequence']
         start = row['start']-1 # 0-based start
         end = row['stop'] # 1-based end
         strand = row['strand']
         fasta_sequence = record_dict[record_id].seq
         if strand == '+':
             assert str(fasta_sequence)[start:end] == fimo_sequence
         else:
             assert str(fasta_sequence[start:end].reverse_complement()) == fimo_sequence