Esempio n. 1
0
 def test_wellSeparatedExons(self):
     exons = BLAST.getLargestExon(
         os.path.join(self.cwd, "BLAST", "query_blastn_output3.csv"), E_value=0.001, ident=98, exon_len=300
     )
     exons = BLAST.wellSeparatedExons(exons)
     result = len(exons)
     self.assertEqual(result, 3)
#!/usr/bin/env python

import os;
from pyphylogenomics import OrthoDB
from pyphylogenomics import BLAST
from pyphylogenomics import MUSCLE


"""
As stated before, we prefer long exons for each of the candidate genes ( > 300
nucleotides):
"""
exons = BLAST.getLargestExon("data/pulled_seqs_blastn_out.csv", E_value=0.001, ident=98, exon_len=300)


"""
Some small segments of sequences might be similar to non-homologous regions of
the genome. We will use the function eraseFalsePosi to keep those matches of
longest length:
"""
exons = BLAST.eraseFalsePosi(exons) # Drop presumable false positives.


"""
Ideally we want exons that are not too close to each other in the genome to
avoid gene linkage. So we will keep only those exons that are apart by 810
kilobases:
"""
exons = BLAST.wellSeparatedExons(exons) # Keep exons separated by > 810KB

Esempio n. 3
0
 def test_eraseFalsePosi(self):
     exons = BLAST.getLargestExon(self.cwd + "/BLAST/query_blastn_out.csv", E_value=0.001, ident=98, exon_len=300)
     exons = BLAST.eraseFalsePosi(exons)
     result = len(exons)
     self.assertEqual(result, 3)
Esempio n. 4
0
 def test_getLargestExon_output_has_headers(self):
     exons = BLAST.getLargestExon(
         self.cwd + "/BLAST/query_blastn_output2.csv", E_value=0.001, ident=98, exon_len=300
     )
     result = len(exons)
     self.assertEqual(result, 38)