def _run(self, _config, temp): def keyfunc(bed): return (bed.contig, bed.name, bed.start) fastafile = pysam.Fastafile(self._reference) seqs = collections.defaultdict(list) with open(self._intervals) as bedfile: intervals = text.parse_lines_by_contig(bedfile, pysam.asBed()).items() for (contig, beds) in sorted(intervals): beds.sort(key = keyfunc) for (gene, gene_beds) in itertools.groupby(beds, lambda x: x.name): gene_beds = tuple(gene_beds) for bed in gene_beds: seqs[(contig, gene)].append(fastafile.fetch(contig, bed.start, bed.end)) seq = "".join(seqs[(contig, gene)]) if any((bed.strand == "-") for bed in gene_beds): assert all((bed.strand == "-") for bed in gene_beds) seq = sequences.reverse_complement(seq) seqs[(contig, gene)] = seq temp_file = os.path.join(temp, "sequences.fasta") with open(temp_file, "w") as out_file: for ((_, gene), sequence) in sorted(seqs.items()): fasta.print_fasta(gene, sequence, out_file) move_file(temp_file, self._outfile)
def _run(self, _config, temp): def keyfunc(bed): return (bed.contig, bed.name, bed.start) fastafile = pysam.Fastafile(self._reference) seqs = collections.defaultdict(list) with open(self._intervals) as bedfile: intervals = text.parse_lines_by_contig(bedfile, pysam.asBed()).items() for (contig, beds) in sorted(intervals): beds.sort(key=keyfunc) for (gene, gene_beds) in itertools.groupby(beds, lambda x: x.name): gene_beds = tuple(gene_beds) for bed in gene_beds: seqs[(contig, gene)].append( fastafile.fetch(contig, bed.start, bed.end)) seq = "".join(seqs[(contig, gene)]) if any((bed.strand == "-") for bed in gene_beds): assert all((bed.strand == "-") for bed in gene_beds) seq = sequences.reverse_complement(seq) seqs[(contig, gene)] = seq temp_file = os.path.join(temp, "sequences.fasta") with open(temp_file, "w") as out_file: for ((_, gene), sequence) in sorted(seqs.items()): fasta.print_fasta(gene, sequence, out_file) move_file(temp_file, self._outfile)
def test_print_fasta__multiple_lines(): expected = ">foobar\n%s\n%s\n" \ % (_SEQ_FRAG * 10, _SEQ_FRAG * 5) stringf = StringIO.StringIO() print_fasta("foobar", _SEQ_FRAG * 15, stringf) assert_equal(stringf.getvalue(), expected)
def test_print_fasta__complete_line_test(): expected = ">barfoo\n%s\n" % (_SEQ_FRAG * 10, ) stringf = StringIO.StringIO() print_fasta("barfoo", _SEQ_FRAG * 10, stringf) assert_equal(stringf.getvalue(), expected)
def test_print_fasta__partial_line(): expected = ">foobar\n%s\n" % (_SEQ_FRAG, ) stringf = StringIO.StringIO() print_fasta("foobar", _SEQ_FRAG, stringf) assert_equal(stringf.getvalue(), expected)
def print_msa(msa, file = sys.stdout): validate_msa(msa) for group in sorted(msa): print_fasta(group, msa[group], file)
def print_msa(msa, file=sys.stdout): validate_msa(msa) for group in sorted(msa): print_fasta(group, msa[group], file)