Example #1
0
 def test_remove_non_word_characters(self):
   header = EMBLHeader()
   test_cases = [
     ('foo', 'foo'),
     ('#foo', 'foo'),
     ('#fo!o', 'foo'),
     ('fo#o', 'foo'),
     ('foo##', 'foo'),
     ("*!#foo", 'foo')
   ]
   for test_string, expected_result in test_cases:
     self.assertEqual(header.remove_non_word_characters(test_string), expected_result)
Example #2
0
 def test_build_source_attributes(self):
   header = EMBLHeader()
   calculated_attributes=header.build_source_attributes(organism="an organism", taxon_id="TAX_ID", sequence_name="sequence_name")
   expected_attributes = {"organism": "an organism", "db_xref": "taxon:TAX_ID", "note": "sequence_name"}
   self.assertEqual(calculated_attributes, expected_attributes)
Example #3
0
  def test_format_long_publication(self):
    header = EMBLHeader()

    header.authors="Jagger M., Richards K., Watts C., Wood R., Jones B., Stewart I., Wyman B., Taylor M."
    header.classification="UNC"
    header.genome_type="circular"
    header.organism="organism"
    header.project="PRJ1234"
    header.publication="The Rolling Stones, 12 X 5, The Rolling Stones No. 2, Out of Our Heads, Aftermath, Between the Buttons, Their Satanic Majesties Request, Beggars Banquet"
    header.sequence_identifier="contig123"
    header.sequence_length=1234
    header.sequence_name="chromX"
    header.taxon_id=5678
    header.title="Let It Bleed, Sticky Fingers, Exile on Main St., Goats Head Soup, It's Only Rock 'n Roll, Black and Blue, Some Girls, Emotional Rescue, Tattoo You, Undercover"
    source_attributes = {"organism": header.organism, "db_xref": "taxon:5678", "note": "chromX"}
    header.source_feature = EMBLFeature('source', 1, 1234, '+', source_attributes)

    expected_header = """\
ID   XXX; XXX; circular; genomic DNA; STD; UNC; 1234 BP.
XX
AC   XXX;
XX
AC * _contig123
XX
PR   Project:PRJ1234;
XX
DE   XXX;
XX
RN   [1]
RA   Jagger M., Richards K., Watts C., Wood R., Jones B., Stewart I., Wyman B.,
RA   Taylor M.;
RT   "Let It Bleed, Sticky Fingers, Exile on Main St., Goats Head Soup, It's
RT   Only Rock 'n Roll, Black and Blue, Some Girls, Emotional Rescue, Tattoo
RT   You, Undercover";
RL   The Rolling Stones, 12 X 5, The Rolling Stones No. 2, Out of Our Heads,
RL   Aftermath, Between the Buttons, Their Satanic Majesties Request, Beggars
RL   Banquet.
XX
FH   Key             Location/Qualifiers
FH
FT   source          1..1234
FT                   /organism="organism"
FT                   /mol_type="genomic DNA"
FT                   /db_xref="taxon:5678"
FT                   /note="chromX"
"""

    calculated_header = header.format()
    for calculated_line,expected_line in zip(calculated_header.split('\n'), expected_header.split('\n')):
      self.assertEqual(calculated_line, expected_line)
    self.assertEqual(len(calculated_header), len(expected_header))
Example #4
0
  def test_format_long_organism_name(self):
    header = EMBLHeader()

    header.authors="John Doe"
    header.classification="UNC"
    header.genome_type="circular"
    header.organism="reeeeeeeeeeeeeeaaaaaaaaaaaaaallllllllllyyyyyyyyyyyyyyyyyyyy_long_name"
    header.project="PRJ1234"
    header.publication="Unpublished"
    header.sequence_identifier="contig123"
    header.sequence_length=1234
    header.sequence_name="chromX"
    header.taxon_id=5678
    header.title="My title"
    source_attributes = {"organism": header.organism, "db_xref": "taxon:5678", "note": "chromX"}
    header.source_feature = EMBLFeature('source', 1, 1234, '+', source_attributes)

    expected_header = """\
ID   XXX; XXX; circular; genomic DNA; STD; UNC; 1234 BP.
XX
AC   XXX;
XX
AC * _contig123
XX
PR   Project:PRJ1234;
XX
DE   XXX;
XX
RN   [1]
RA   John Doe;
RT   "My title";
RL   Unpublished.
XX
FH   Key             Location/Qualifiers
FH
FT   source          1..1234
FT                   /organism="reeeeeeeeeeeeeeaaaaaaaaaaaaaallllllllllyyyyyyyy
FT                   yyyyyyyyyyyy_long_name"
FT                   /mol_type="genomic DNA"
FT                   /db_xref="taxon:5678"
FT                   /note="chromX"
"""

    calculated_header = header.format()
    for calculated_line,expected_line in zip(calculated_header.split('\n'), expected_header.split('\n')):
      self.assertEqual(calculated_line, expected_line)
    self.assertEqual(len(calculated_header), len(expected_header))