コード例 #1
0
ファイル: genotype.py プロジェクト: schae234/pypeline
    def _run(self, _config, temp):
        def keyfunc(bed):
            return (bed.contig, bed.name, bed.start)

        fastafile = pysam.Fastafile(self._reference)
        seqs = collections.defaultdict(list)
        with open(self._intervals) as bedfile:
            intervals = text.parse_lines_by_contig(bedfile, pysam.asBed()).items()
            for (contig, beds) in sorted(intervals):
                beds.sort(key = keyfunc)

                for (gene, gene_beds) in itertools.groupby(beds, lambda x: x.name):
                    gene_beds = tuple(gene_beds)
                    for bed in gene_beds:
                        seqs[(contig, gene)].append(fastafile.fetch(contig, bed.start, bed.end))

                    seq = "".join(seqs[(contig, gene)])
                    if any((bed.strand == "-") for bed in gene_beds):
                        assert all((bed.strand == "-") for bed in gene_beds)
                        seq = sequences.reverse_complement(seq)
                    seqs[(contig, gene)] = seq

        temp_file = os.path.join(temp, "sequences.fasta")
        with open(temp_file, "w") as out_file:
            for ((_, gene), sequence) in sorted(seqs.items()):
                fasta.print_fasta(gene, sequence, out_file)

        move_file(temp_file, self._outfile)
コード例 #2
0
    def _run(self, _config, temp):
        def keyfunc(bed):
            return (bed.contig, bed.name, bed.start)

        fastafile = pysam.Fastafile(self._reference)
        seqs = collections.defaultdict(list)
        with open(self._intervals) as bedfile:
            intervals = text.parse_lines_by_contig(bedfile,
                                                   pysam.asBed()).items()
            for (contig, beds) in sorted(intervals):
                beds.sort(key=keyfunc)

                for (gene,
                     gene_beds) in itertools.groupby(beds, lambda x: x.name):
                    gene_beds = tuple(gene_beds)
                    for bed in gene_beds:
                        seqs[(contig, gene)].append(
                            fastafile.fetch(contig, bed.start, bed.end))

                    seq = "".join(seqs[(contig, gene)])
                    if any((bed.strand == "-") for bed in gene_beds):
                        assert all((bed.strand == "-") for bed in gene_beds)
                        seq = sequences.reverse_complement(seq)
                    seqs[(contig, gene)] = seq

        temp_file = os.path.join(temp, "sequences.fasta")
        with open(temp_file, "w") as out_file:
            for ((_, gene), sequence) in sorted(seqs.items()):
                fasta.print_fasta(gene, sequence, out_file)

        move_file(temp_file, self._outfile)
コード例 #3
0
ファイル: fasta_test.py プロジェクト: schae234/pypeline
def test_print_fasta__multiple_lines():
    expected = ">foobar\n%s\n%s\n" \
        % (_SEQ_FRAG * 10, _SEQ_FRAG * 5)
    stringf = StringIO.StringIO()
    print_fasta("foobar", _SEQ_FRAG * 15, stringf)
    assert_equal(stringf.getvalue(), expected)
コード例 #4
0
ファイル: fasta_test.py プロジェクト: schae234/pypeline
def test_print_fasta__complete_line_test():
    expected = ">barfoo\n%s\n" % (_SEQ_FRAG * 10, )
    stringf = StringIO.StringIO()
    print_fasta("barfoo", _SEQ_FRAG * 10, stringf)
    assert_equal(stringf.getvalue(), expected)
コード例 #5
0
ファイル: fasta_test.py プロジェクト: schae234/pypeline
def test_print_fasta__partial_line():
    expected = ">foobar\n%s\n" % (_SEQ_FRAG, )
    stringf = StringIO.StringIO()
    print_fasta("foobar", _SEQ_FRAG, stringf)
    assert_equal(stringf.getvalue(), expected)
コード例 #6
0
def test_print_fasta__multiple_lines():
    expected = ">foobar\n%s\n%s\n" \
        % (_SEQ_FRAG * 10, _SEQ_FRAG * 5)
    stringf = StringIO.StringIO()
    print_fasta("foobar", _SEQ_FRAG * 15, stringf)
    assert_equal(stringf.getvalue(), expected)
コード例 #7
0
def test_print_fasta__complete_line_test():
    expected = ">barfoo\n%s\n" % (_SEQ_FRAG * 10, )
    stringf = StringIO.StringIO()
    print_fasta("barfoo", _SEQ_FRAG * 10, stringf)
    assert_equal(stringf.getvalue(), expected)
コード例 #8
0
def test_print_fasta__partial_line():
    expected = ">foobar\n%s\n" % (_SEQ_FRAG, )
    stringf = StringIO.StringIO()
    print_fasta("foobar", _SEQ_FRAG, stringf)
    assert_equal(stringf.getvalue(), expected)
コード例 #9
0
ファイル: msa.py プロジェクト: schae234/pypeline
def print_msa(msa, file = sys.stdout):
    validate_msa(msa)
    for group in sorted(msa):
        print_fasta(group, msa[group], file)
コード例 #10
0
def print_msa(msa, file=sys.stdout):
    validate_msa(msa)
    for group in sorted(msa):
        print_fasta(group, msa[group], file)