Python FastaParser.entries 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: reademptionlib.fasta

클래스/타입: FastaParser

메소드/함수: entries

hotexamples.com에서의 예제들: 4

Python FastaParser.entries - 4개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 reademptionlib.fasta.FastaParser.entries에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

자주 사용되는 메소드들

보기 숨기기

FastaParser(4)

entries(3)

single_entry_file_header(3)

header_id(2)

예제 #1

파일 보기

class TestFastaParser(unittest.TestCase):

    def setUp(self):
        self.fasta_parser = FastaParser()
        self.example_data = ExampleData()

    def test_parse_1(self):
        fasta_fh = StringIO(self.example_data.fasta_seqs_1)
        self.assertEqual(
            list(self.fasta_parser.entries(fasta_fh)), 
            [('test_1 a random sequence', 'TTTAGAAATTACACA'), 
             ('test_2 another random sequence', 'ACGAGAAATTAAATTAAATT'), 
             ('test_3 another random sequence', 'TAGAGACATTGGATTTTATT')])

    def test_parse_empty_file(self):
        fasta_fh = StringIO("")
        self.assertEqual(
            list(self.fasta_parser.entries(fasta_fh)), [])

    def test_single_entry_file_header(self):
        fasta_fh = StringIO(self.example_data.fasta_seqs_2)
        self.assertEqual(self.fasta_parser.single_entry_file_header(fasta_fh), 
                         "test_4 a random sequence")

    def test_header_id_1(self):
        self.assertEqual(
            self.fasta_parser.header_id("seq_10101 An important protein"),
            "seq_10101")

    def test_header_id_2(self):
        self.assertEqual(
            self.fasta_parser.header_id("seq_10101\tAn important protein"),
            "seq_10101")

예제 #2

파일 보기

파일: test_fasta.py 프로젝트: DiarmaidTobin/READemption

def test_fasta_parser():
    # Define some dummy data & parser object
    fasta_parser = FastaParser()
    fasta_seqs_1 = """>test_1 a random sequence
TTTAG
AAATT
ACACA
>test_2 another random sequence
ACGAG
AAATT
AAATT
AAATT
>test_3 another random sequence
TAGAG
ACATT
GGATT
TTATT
"""
    fasta_seqs_2 = """>test_4 a random sequence
TTTAG
AAATT
ACACA
"""

    # test fasta entry
    fasta_fh = StringIO(fasta_seqs_1)
    assert list(fasta_parser.entries(fasta_fh)) == [
        ('test_1 a random sequence', 'TTTAGAAATTACACA'),
        ('test_2 another random sequence', 'ACGAGAAATTAAATTAAATT'),
        ('test_3 another random sequence', 'TAGAGACATTGGATTTTATT')
    ]

    # test empty fasta file
    fasta_empty_fh = StringIO("")
    assert list(fasta_parser.entries(fasta_empty_fh)) == []

    # test single entry file header
    fasta_header_fh = StringIO(fasta_seqs_2)
    assert fasta_parser.single_entry_file_header(
        fasta_header_fh) == "test_4 a random sequence"

    # test header id 1
    assert fasta_parser.header_id(
        "seq_10101 An important protein") == "seq_10101"

    # test header id 2
    assert fasta_parser.header_id(
        "seq_10101\tAn important protein") == "seq_10101"

예제 #3

파일 보기

파일: readalignerstats.py 프로젝트: LeiLiSysBio/READemption

class ReadAlignerStats(object):

    def __init__(self):
        self.fasta_parser = FastaParser()

    def count(self, read_alignment_result_bam_path, unaligned_reads_path):
        self._stats = {}
        self._count_aligned_reads_and_alignments(
            read_alignment_result_bam_path)
        self._count_unaligned_reads(unaligned_reads_path)
        return self._stats

    def _count_unaligned_reads(self, unaligned_read_paths):
        
        with open(unaligned_read_paths) as fasta_fh:
            self._stats["stats_total"][
                "no_of_unaligned_reads"] = self._count_fasta_entries(fasta_fh)

    def _count_fasta_entries(self, fasta_fh):
        return reduce(lambda x, y: x + 1,
                      self.fasta_parser.entries(fasta_fh), 0)

    def _count_aligned_reads_and_alignments(
            self, read_alignment_result_bam_path):
        bam = pysam.Samfile(read_alignment_result_bam_path)
        stats_per_ref = defaultdict(dict)
        no_of_hits_per_read_freq = {}
        for ref_id in bam.references:
            self._init_counting_dict(stats_per_ref, ref_id)
        for entry in bam.fetch():
            ref_id = bam.getrname(entry.tid)
            try:
                self._count_alignment(
                    entry, ref_id, stats_per_ref, no_of_hits_per_read_freq)
            except KeyError:
                sys.stderr.write(
                    "SAM entry with unspecified reference found! Stoping\n")
                sys.exit(2)
        self._stats["stats_per_reference"] = stats_per_ref
        for ref_id, stats in stats_per_ref.items():
            stats_per_ref[ref_id][
                "no_of_hits_per_read_and_freqs"] = self._calc_down_to_read(
                stats_per_ref[ref_id]["no_of_hits_per_read_and_freqs"])
        self._stats["stats_total"] = self._sum_countings(stats_per_ref)

    def _sum_countings(self, stats_per_ref):
        total_stats = {}
        for ref_id, stats in stats_per_ref.items():
            for attribute, value in stats.items():
                if type(value) is int or type(value) is float:
                    total_stats.setdefault(attribute, 0)
                    total_stats[attribute] += value
                elif type(value) is dict:
                    total_stats.setdefault(attribute, {})
                    for value_int, freq in value.items():
                        total_stats[attribute].setdefault(value_int, 0)
                        total_stats[attribute][value_int] += freq
        return total_stats

    def _calc_down_to_read(self, no_of_hits_per_read_freq):
        """As the frequencies were determined via the alignments we need
        to normalized each frequency value down to the read by
        dividing the frequencig by the number of hits per read.
        """
        return dict((no_of_hits_per_read, freq/no_of_hits_per_read)
                    for no_of_hits_per_read, freq in
                    no_of_hits_per_read_freq.items())

    def _init_counting_dict(self, stats_per_ref, ref_id):
        stats_per_ref[ref_id] = defaultdict(float)
        stats_per_ref[ref_id]["no_of_alignments"]
        stats_per_ref[ref_id]["no_of_aligned_reads"]
        stats_per_ref[ref_id]["no_of_split_alignments"]
        stats_per_ref[ref_id]["no_of_uniquely_aligned_reads"]
        stats_per_ref[ref_id][
            "alignment_length_and_freqs"] = defaultdict(int)
        stats_per_ref[ref_id][
            "no_of_hits_per_read_and_freqs"] = defaultdict(int)

    def _count_alignment(self, entry, ref_id, stats_per_ref,
                         no_of_hits_per_read_freq):
        entry_tags_dict = dict(entry.tags)
        no_of_hits = entry_tags_dict["NH"]
        # Consider split reads
        no_of_splits = float(entry_tags_dict.get("XL", 1))
        stats_per_ref[ref_id]["no_of_hits_per_read_and_freqs"][
            no_of_hits] += 1
        if "XL" in entry_tags_dict:
            stats_per_ref[ref_id]["no_of_split_alignments"] += 1.0/no_of_splits
        stats_per_ref[ref_id]["no_of_alignments"] += 1.0/no_of_splits
        stats_per_ref[
            ref_id]["no_of_aligned_reads"] += 1.0/(
            float(no_of_hits) * no_of_splits)
        if no_of_hits == 1:
            stats_per_ref[ref_id][
                "no_of_uniquely_aligned_reads"] += 1.0/no_of_splits
        stats_per_ref[ref_id][
            "alignment_length_and_freqs"][entry.alen] += 1

예제 #4

파일 보기

class ReadAlignerStats(object):
    def __init__(self):
        self.fasta_parser = FastaParser()

    def count(self, read_alignment_result_bam_path, unaligned_reads_path):
        self._stats = {}
        if unaligned_reads_path == "NA":
            self._count_aligned_reads_and_alignments(
                read_alignment_result_bam_path)
        else:
            self._count_aligned_reads_and_alignments(
                read_alignment_result_bam_path)
            self._count_unaligned_reads(unaligned_reads_path)
        return self._stats

    def _count_unaligned_reads(self, unaligned_read_paths):
        if os.path.isfile(unaligned_read_paths):
            with open(unaligned_read_paths) as fasta_fh:
                self._stats["stats_total"][
                    "no_of_unaligned_reads"] = self._count_fasta_entries(
                        fasta_fh)

    def _count_fasta_entries(self, fasta_fh):
        return reduce(lambda x, y: x + 1, self.fasta_parser.entries(fasta_fh),
                      0)

    def _count_aligned_reads_and_alignments(self,
                                            read_alignment_result_bam_path):
        bam = pysam.Samfile(read_alignment_result_bam_path)
        stats_per_ref = defaultdict(dict)
        no_of_hits_per_read_freq = {}
        for ref_id in bam.references:
            self._init_counting_dict(stats_per_ref, ref_id)
        for entry in bam.fetch():
            ref_id = bam.get_reference_name(entry.tid)
            try:
                self._count_alignment(entry, ref_id, stats_per_ref,
                                      no_of_hits_per_read_freq)
            except KeyError:
                sys.stderr.write(
                    "SAM entry with unspecified reference found! Stoping\n")
                sys.exit(2)
        self._stats["stats_per_reference"] = stats_per_ref
        for ref_id, stats in stats_per_ref.items():
            stats_per_ref[ref_id][
                "no_of_hits_per_read_and_freqs"] = self._calc_down_to_read(
                    stats_per_ref[ref_id]["no_of_hits_per_read_and_freqs"])
        self._stats["stats_total"] = self._sum_countings(stats_per_ref)

    def _sum_countings(self, stats_per_ref):
        total_stats = {}
        for ref_id, stats in stats_per_ref.items():
            for attribute, value in stats.items():
                if type(value) is int or type(value) is float:
                    total_stats.setdefault(attribute, 0)
                    total_stats[attribute] += value
                elif type(value) is dict:
                    total_stats.setdefault(attribute, {})
                    for value_int, freq in value.items():
                        total_stats[attribute].setdefault(value_int, 0)
                        total_stats[attribute][value_int] += freq
        return total_stats

    def _calc_down_to_read(self, no_of_hits_per_read_freq):
        """As the frequencies were determined via the alignments we need
        to normalized each frequency value down to the read by
        dividing the frequencig by the number of hits per read.
        """
        return dict(
            (no_of_hits_per_read, freq / no_of_hits_per_read)
            for no_of_hits_per_read, freq in no_of_hits_per_read_freq.items())

    def _init_counting_dict(self, stats_per_ref, ref_id):
        stats_per_ref[ref_id] = defaultdict(float)
        stats_per_ref[ref_id]["no_of_alignments"]
        stats_per_ref[ref_id]["no_of_aligned_reads"]
        stats_per_ref[ref_id]["no_of_split_alignments"]
        stats_per_ref[ref_id]["no_of_uniquely_aligned_reads"]
        stats_per_ref[ref_id]["alignment_length_and_freqs"] = defaultdict(int)
        stats_per_ref[ref_id]["no_of_hits_per_read_and_freqs"] = defaultdict(
            int)

    def _count_alignment(self, entry, ref_id, stats_per_ref,
                         no_of_hits_per_read_freq):
        entry_tags_dict = dict(entry.tags)
        no_of_hits = entry_tags_dict["NH"]
        # Consider split reads
        no_of_splits = float(entry_tags_dict.get("XL", 1))
        stats_per_ref[ref_id]["no_of_hits_per_read_and_freqs"][no_of_hits] += 1
        if "XL" in entry_tags_dict:
            stats_per_ref[ref_id][
                "no_of_split_alignments"] += 1.0 / no_of_splits
        stats_per_ref[ref_id]["no_of_alignments"] += 1.0 / no_of_splits
        stats_per_ref[ref_id]["no_of_aligned_reads"] += 1.0 / (
            float(no_of_hits) * no_of_splits)
        if no_of_hits == 1:
            stats_per_ref[ref_id][
                "no_of_uniquely_aligned_reads"] += 1.0 / no_of_splits
        stats_per_ref[ref_id]["alignment_length_and_freqs"][
            entry.query_length] += 1