Example #1
0
    def test_embl_cds_interaction(self):
        """Test EMBL CDS interaction, parse CDS features on embl files."""
        embl_s = Scanner.EmblScanner()

        # Test parse CDS features on embl_file
        with open("EMBL/AE017046.embl") as handle_embl7046:
            l_cds_f = list(embl_s.parse_cds_features(handle_embl7046))
        # number of records, should be 10
        self.assertEqual(len(l_cds_f), 10)
        # Seq ID
        self.assertEqual(l_cds_f[0].id, 'AAS58758.1')
        self.assertEqual(l_cds_f[0].description, 'putative transposase')
Example #2
0
    def test_embl_record_interaction(self):
        """Test EMBL Record interaction on embl files."""
        embl_s = Scanner.EmblScanner()

        #  Test parse records on embl_file
        with open("EMBL/AE017046.embl") as handle_embl7046:
            l_embl_r = list(embl_s.parse_records(handle_embl7046, do_features=True))
        # number of records, should be 1
        self.assertEqual(len(l_embl_r), 1)
        self.assertEqual(l_embl_r[0].id, 'AE017046.1')
        self.assertEqual(l_embl_r[0].description, 'Yersinia pestis biovar Microtus '
                                                  'str. 91001 plasmid pPCP1, complete '
                                                  'sequence.')
        self.assertEqual(len(l_embl_r[0].features), 29)
Example #3
0
 def test_topology_embl(self):
     """Check EMBL ID line parsing."""
     # This is a bit low level, but can test pasing the ID line only
     tests = [
         # Modern examples with sequence version
         ("ID   X56734; SV 1; linear; mRNA; STD; PLN; 1859 BP.", "linear",
          "mRNA", "PLN"),
         ("ID   CD789012; SV 4; linear; genomic DNA; HTG; MAM; 500 BP.",
          "linear", "genomic DNA", "MAM"),
         # Example to match GenBank example used above:
         ("ID   U49845; SV 1; linear; genomic DNA; STD; FUN; 5028 BP.",
          "linear", "genomic DNA", "FUN"),
         # Old examples:
         ("ID   BSUB9999   standard; circular DNA; PRO; 4214630 BP.",
          "circular", "DNA", "PRO"),
         ("ID   SC10H5 standard; DNA; PRO; 4870 BP.", None, "DNA", "PRO"),
         # Patent example from 2016-06-10
         # ftp://ftp.ebi.ac.uk/pub/databases/embl/patent/
         ("ID   A01679; SV 1; linear; unassigned DNA; PAT; MUS; 12 BP.",
          "linear", "unassigned DNA", "MUS"),
         # Old patent examples
         ("ID   NRP_AX000635; PRT; NR1; 15 SQ", None, None, "NR1"),
         ("ID   NRP0000016E; PRT; NR2; 5 SQ", None, None, "NR2"),
         # KIPO patent examples
         ("ID   DI500001       STANDARD;      PRT;   111 AA.", None, None,
          None),
         ("ID   DI644510   standard; PRT;  1852 AA.", None, None, None),
     ]
     for (line, topo, mol_type, div) in tests:
         scanner = Scanner.EmblScanner()
         consumer = GenBank._FeatureConsumer(1, GenBank.FeatureValueCleaner)
         scanner._feed_first_line(consumer, line)
         t = consumer.data.annotations.get('topology', None)
         self.assertEqual(
             t, topo, "Wrong topology %r not %r from %r" % (t, topo, line))
         mt = consumer.data.annotations.get('molecule_type', None)
         self.assertEqual(
             mt, mol_type,
             "Wrong molecule_type %r not %r from %r" % (mt, mol_type, line))
         d = consumer.data.annotations.get('data_file_division', None)
         self.assertEqual(
             d, div, "Wrong division %r not %r from %r" % (d, div, line))