def setUpClass(cls): cls.alphabet = Alphabet.amino() with SequenceFile( pkg_resources.resource_filename( "tests", "data/seqs/938293.PRJEB85.HG003687.faa")) as f: f.set_digital(cls.alphabet) cls.references = list(f)
def test_pksi(self): alphabet = Alphabet.amino() path = pkg_resources.resource_filename(__name__, "data/seqs/PKSI.faa") with SequenceFile(path) as seqs_file: seqs_file.set_digital(alphabet) seqs = list(seqs_file) hits = next(pyhmmer.phmmer(seqs[-1:], seqs, cpus=1)) hits.sort() with self.table("A0A089QRB9.tbl") as table: lines = iter(filter(lambda line: not line.startswith("#"), table)) it = ((hit, domain) for hit in hits for domain in hit.domains) for line, (hit, domain) in itertools.zip_longest(lines, it): fields = list(filter(None, line.strip().split(" "))) self.assertIsNot(line, None) self.assertIsNot(hit, None) self.assertEqual(hit.name.decode(), fields[0]) self.assertAlmostEqual(hit.score, float(fields[7]), delta=0.1) self.assertAlmostEqual(hit.bias, float(fields[8]), delta=0.1) self.assertAlmostEqual(hit.evalue, float(fields[6]), delta=0.1) self.assertAlmostEqual(domain.i_evalue, float(fields[12]), delta=0.1) self.assertAlmostEqual(domain.score, float(fields[13]), delta=0.1) self.assertEqual(domain.alignment.hmm_from, int(fields[15])) self.assertEqual(domain.alignment.hmm_to, int(fields[16])) self.assertEqual(domain.alignment.target_from, int(fields[17])) self.assertEqual(domain.alignment.target_to, int(fields[18])) self.assertEqual(domain.env_from, int(fields[19])) self.assertEqual(domain.env_to, int(fields[20]))
def test_consensus(self): # if not set, HMM is None abc = Alphabet.amino() hmm = HMM(100, abc) self.assertIs(hmm.consensus, None) # if the HMM is fully configured, the consensus should be as # long as the number of nodes self.assertEqual(len(self.hmm.consensus), self.hmm.M)
def test_no_queries(self): alphabet = Alphabet.amino() path = pkg_resources.resource_filename(__name__, "data/seqs/PKSI.faa") with SequenceFile(path) as seqs_file: seqs_file.set_digital(alphabet) seqs = list(seqs_file) hits = pyhmmer.phmmer([], seqs, cpus=1) self.assertIs(None, next(hits, None))
def test_background_error(self): # check that errors occuring in worker threads are recovered and raised # in the main threads (a common error is mismatching the HMM and the # sequence alphabets). seqs = [TextSequence().digitize(Alphabet.dna())] with self.hmm_file("PF02826") as hmm_file: hmm = next(hmm_file) self.assertRaises(ValueError, self.get_hits, hmm, seqs)
def test_build_protein(self): abc = Alphabet.amino() builder = Builder(alphabet=abc) seq = self.proteins[1].digitize(abc) hmm, profile, opt = builder.build(seq, Background(abc)) self.assertEqual(hmm.name, seq.name) self.assertEqual(hmm.alphabet, abc) self.assertEqual(profile.alphabet, abc) self.assertEqual(opt.alphabet, abc)
def test_build_dna(self): abc = Alphabet.dna() builder = Builder(alphabet=abc) hmm, profile, opt = builder.build(self.dna.digitize(abc), Background(abc)) self.assertEqual(hmm.name, self.dna.name) self.assertEqual(hmm.alphabet, abc) self.assertEqual(profile.alphabet, abc) self.assertEqual(opt.alphabet, abc) self.assertEqual(hmm.M, self.ecori.M)
def test_alphabet_mismatch(self): pipeline = Pipeline(alphabet=Alphabet.dna()) # mismatch between pipeline alphabet and query alphabet dsq = TextSequence(sequence="IRGIY").digitize(self.alphabet) self.assertRaises(AlphabetMismatch, pipeline.scan_seq, dsq, self.hmms) # mismatch between pipeline alphabet and database alphabet dsq = TextSequence(sequence="ATGC").digitize(pipeline.alphabet) self.assertRaises(AlphabetMismatch, pipeline.scan_seq, dsq, self.hmms)
def test_accession(self): abc = Alphabet.amino() hmm = HMM(100, abc) self.assertIs(hmm.accession, None) hmm.accession = b"TST001" self.assertEqual(hmm.accession, b"TST001") hmm.accession = b"" self.assertEqual(hmm.accession, b"") hmm.accession = None self.assertEqual(hmm.accession, None)
def test_name(self): abc = Alphabet.amino() hmm = HMM(100, abc) self.assertIs(hmm.name, None) hmm.name = b"Test" self.assertEqual(hmm.name, b"Test") hmm.name = b"" self.assertEqual(hmm.name, b"") hmm.name = None self.assertEqual(hmm.name, None)
def test_build_msa_dna(self): abc = Alphabet.dna() builder = Builder(alphabet=abc) with MSAFile(os.path.join(self.testdata, "3box.sto")) as msa_file: msa_file.set_digital(abc) msa = next(msa_file) msa.name = b"3box" with HMMFile(os.path.join(self.testdata, "3box.hmm")) as hmm_file: hmm_exp = next(hmm_file) hmm, profile, opt = builder.build_msa(msa, Background(abc)) self.assertEqual(hmm.name, b"3box") self.assertEqual(hmm.M, hmm_exp.M)
def test_eq(self): abc = Alphabet.amino() self.assertEqual(self.hmm, self.hmm) self.assertEqual(self.hmm, self.hmm.copy()) self.assertNotEqual(self.hmm, HMM(100, abc)) self.assertNotEqual(self.hmm, 1)
def test_copy(self): abc = Alphabet.amino() builder = Builder(alphabet=abc) copy = builder.copy() self.assertEqual(builder.alphabet, copy.alphabet) self.assertEqual(builder.seed, copy.seed)