Example #1
0
    def test_metaonly(self):
        fam = Family()
        fam.name = "Test3"
        fam.accession = "TEST0003"
        fam.version = 3
        fam.clades = [5]
        fam.consensus = "ACGTTGCA"
        fam.repeat_type = "Test"
        fam.repeat_subtype = "Metadata"

        self.assertEqual(
            fam.to_embl(fakedb(), include_seq=False), """\
ID   TEST0003; SV 3; linear; DNA; STD; UNC; 8 BP.
NM   Test3
XX
AC   TEST0003;
XX
XX
KW   Test/Metadata.
XX
OS   Species 1
OC   Parent Clade; A Clade.
XX
CC
CC   RepeatMasker Annotations:
CC        Type: Test
CC        SubType: Metadata
CC        Species: Species_1
CC        SearchStages: 
CC        BufferStages: 
XX
//
""")
Example #2
0
def test_family():
    fam = Family()
    fam.accession = "TEST0001"
    fam.title = "A Simple Test"
    fam.version = 1
    fam.clades = [5, 3]
    fam.repeat_type = "Type"
    fam.repeat_subtype = "SubType"
    fam.classification = "root;Type;SubType"
    fam.model = """\
HMMER3/f [3.1b2 | February 2015]
NAME  TEST0001#Type/SubType
LENG  100
MAXL  122
ALPH  DNA
RF    yes
MM    no
CONS  yes
CS    no
MAP   yes
DATE  Mon Aug 17 23:04:43 2015
NSEQ  2000
EFFN  18.549065
CKSUM 765031794
STATS LOCAL MSV      -10.5531  0.70202
STATS LOCAL VITERBI  -11.4974  0.70202
STATS LOCAL FORWARD   -4.5297  0.70202
HMM          A        C        G        T   
            m->m     m->i     m->d     i->m     i->i     d->m     d->d
<snip>
"""

    return fam
Example #3
0
    def test_attached_to_root(self):
        fam = Family()
        fam.name = "Test6"
        fam.accession = "TEST0006"
        fam.version = 6
        fam.clades = [1]
        fam.consensus = "ACGTTGCAGAGACTCT"
        fam.repeat_type = "Test"
        fam.repeat_subtype = "RootTaxa"

        self.assertEqual(
            fam.to_embl(fakedb(), include_seq=False), """\
ID   TEST0006; SV 6; linear; DNA; STD; UNC; 16 BP.
NM   Test6
XX
AC   TEST0006;
XX
XX
KW   Test/RootTaxa.
XX
XX
CC
CC   RepeatMasker Annotations:
CC        Type: Test
CC        SubType: RootTaxa
CC        Species: root
CC        SearchStages: 
CC        BufferStages: 
XX
//
""")
Example #4
0
    def test_simple(self):
        fam = Family()
        fam.name = "Test1"
        fam.accession = "TEST0001"
        fam.version = 1
        fam.clades = [5]
        fam.consensus = "ACGTAAAA"
        fam.repeat_type = "Type"
        fam.repeat_subtype = "SubType"

        self.assertEqual(
            fam.to_embl(fakedb()), """\
ID   TEST0001; SV 1; linear; DNA; STD; UNC; 8 BP.
NM   Test1
XX
AC   TEST0001;
XX
XX
KW   Type/SubType.
XX
OS   Species 1
OC   Parent Clade; A Clade.
XX
CC
CC   RepeatMasker Annotations:
CC        Type: Type
CC        SubType: SubType
CC        Species: Species_1
CC        SearchStages: 
CC        BufferStages: 
XX
SQ   Sequence 8 BP; 5 A; 1 C; 1 G; 1 T; 0 other;
     acgtaaaa                                                           8
//
""")
Example #5
0
    def test_search_stages(self):
        fam = Family()
        fam.name = "Test9"
        fam.accession = "TEST0009"
        fam.version = 9
        fam.clades = [2]
        fam.consensus = "ACGT"
        fam.search_stages = "30,45"

        self.assertEqual(
            fam.to_fasta(fakedb()),
            ">Test9 @A_Clade [S:30,45]\nACGT\n"
        )
Example #6
0
    def test_no_consensus(self):
        fam = Family()
        fam.name = "Test9"
        fam.accession = "TEST0009"
        fam.version = 9
        fam.clades = [2]

        self.assertEqual(fam.to_embl(None), None)
Example #7
0
    def test_missing_consensus(self):
        fam = Family()
        fam.name = "Test8"
        fam.accession = "TEST0008"
        fam.version = 8
        fam.clades = []

        self.assertEqual(fam.to_fasta(fakedb()), None)
Example #8
0
    def test_clades(self):
        fam = Family()
        fam.name = "Test4"
        fam.accession = "TEST0004"
        fam.version = 4
        fam.clades = [2, 3]
        fam.consensus = "ACGT"

        self.assertEqual(
            fam.to_fasta(fakedb()),
            ">Test4 @A_Clade @Another_Clade_3.\nACGT\n"
        )
Example #9
0
    def test_complement(self):
        fam = Family()
        fam.name = "Test3"
        fam.accession = "TEST0003"
        fam.version = 3
        fam.clades = []
        fam.consensus = "CGTAWWKSAAAA"

        self.assertEqual(
            fam.to_fasta(None, do_reverse_complement=True),
            ">Test3 (anti)\nTTTTWMSSTACG\n"
        )
Example #10
0
    def test_always_exports_uppercase(self):
        fam = Family()
        fam.name = "Test10"
        fam.accession = "TEST0010"
        fam.version = 10
        fam.clades = []
        fam.consensus = "acgt"

        self.assertEqual(
            fam.to_fasta(fakedb()),
            ">Test10\nACGT\n"
        )
Example #11
0
    def test_simple(self):
        fam = Family()
        fam.name = "Test1"
        fam.accession = "TEST0001"
        fam.version = 1
        fam.clades = []
        fam.consensus = "ACGTAAAA"

        self.assertEqual(
            fam.to_fasta(None),
            ">Test1\nACGTAAAA\n"
        )
        self.assertEqual(
            fam.to_fasta(None, use_accession=True),
            ">TEST0001.1 name=Test1\nACGTAAAA\n"
        )
Example #12
0
    def test_buffer(self):
        fam = Family()
        fam.name = "Test6"
        fam.accession = "TEST0006"
        fam.version = 6
        fam.clades = []
        fam.consensus = "AAAAGCGCGCAAAA"

        self.assertEqual(
            fam.to_fasta(fakedb(), buffer=True),
            ">Test6#buffer\nAAAAGCGCGCAAAA\n"
        )

        self.assertEqual(
            fam.to_fasta(fakedb(), buffer=[5, 10]),
            ">Test6_5_10#buffer\nGCGCGC\n"
        )
Example #13
0
    def test_seqonly(self):
        fam = Family()
        fam.name = "Test4"
        fam.accession = "TEST0004"
        fam.version = 4
        fam.clades = [5]
        fam.consensus = "ACGTTGCA"
        fam.repeat_type = "Test"
        fam.repeat_subtype = "SequenceOnly"

        self.assertEqual(
            fam.to_embl(fakedb(), include_meta=False), """\
ID   TEST0004; SV 4; linear; DNA; STD; UNC; 8 BP.
NM   Test4
XX
AC   TEST0004;
XX
XX
SQ   Sequence 8 BP; 2 A; 2 C; 2 G; 2 T; 0 other;
     acgttgca                                                           8
//
""")
Example #14
0
    def test_multiline(self):
        fam = Family()
        fam.name = "Test2"
        fam.accession = "TEST0002"
        fam.version = 2
        fam.clades = [5]
        fam.consensus = "ACGTTGCA" * 20  # 160 bp total
        fam.repeat_type = "Test"
        fam.repeat_subtype = "Multiline"

        self.assertEqual(
            fam.to_embl(fakedb()), """\
ID   TEST0002; SV 2; linear; DNA; STD; UNC; 160 BP.
NM   Test2
XX
AC   TEST0002;
XX
XX
KW   Test/Multiline.
XX
OS   Species 1
OC   Parent Clade; A Clade.
XX
CC
CC   RepeatMasker Annotations:
CC        Type: Test
CC        SubType: Multiline
CC        Species: Species_1
CC        SearchStages: 
CC        BufferStages: 
XX
SQ   Sequence 160 BP; 40 A; 40 C; 40 G; 40 T; 0 other;
     acgttgcaac gttgcaacgt tgcaacgttg caacgttgca acgttgcaac gttgcaacgt  60
     tgcaacgttg caacgttgca acgttgcaac gttgcaacgt tgcaacgttg caacgttgca  120
     acgttgcaac gttgcaacgt tgcaacgttg caacgttgca                        160
//
""")
Example #15
0
    def test_multiline(self):
        fam = Family()
        fam.name = "Test5"
        fam.accession = "TEST0005"
        fam.version = 5
        fam.clades = []
        fam.consensus = "ACGTTGCA" * 20 # 160 bp total

        self.assertEqual(
            fam.to_fasta(fakedb()),
            """\
>Test5
ACGTTGCAACGTTGCAACGTTGCAACGTTGCAACGTTGCAACGTTGCAACGTTGCAACGT
TGCAACGTTGCAACGTTGCAACGTTGCAACGTTGCAACGTTGCAACGTTGCAACGTTGCA
ACGTTGCAACGTTGCAACGTTGCAACGTTGCAACGTTGCA
"""
        )
Example #16
0
    def test_without_version(self):
        fam = Family()
        fam.accession = "Test11"
        fam.clades = []
        fam.consensus = "acgt"

        self.assertEqual(
            fam.to_fasta(fakedb(), use_accession=True),
            ">Test11\nACGT\n"
        )
Example #17
0
    def test_all(self):
        fam = Family()
        fam.name = "Test7"
        fam.accession = "TEST0007"
        fam.version = 7
        fam.clades = [2, 3]
        fam.consensus = "ACGTTGCA" * 20 # 160 bp total

        self.assertEqual(
            fam.to_fasta(
                fakedb(),
                use_accession=True,
                include_class_in_name=True,
                buffer=True,
            ),
            """\
>TEST0007.7#buffer name=Test7 @A_Clade @Another_Clade_3.
ACGTTGCAACGTTGCAACGTTGCAACGTTGCAACGTTGCAACGTTGCAACGTTGCAACGT
TGCAACGTTGCAACGTTGCAACGTTGCAACGTTGCAACGTTGCAACGTTGCAACGTTGCA
ACGTTGCAACGTTGCAACGTTGCAACGTTGCAACGTTGCA
"""
        )

        self.assertEqual(
            fam.to_fasta(
                fakedb(),
                use_accession=True,
                include_class_in_name=True,
                do_reverse_complement=True,
                buffer=[23, 39],
            ),
            """\
>TEST0007.7_23_39#buffer (anti) name=Test7 @A_Clade @Another_Clade_3.
GCAACGTTGCAACGTTG
"""
        )
Example #18
0
    def test_cds(self):
        fam = Family()
        fam.name = "Test8"
        fam.accession = "TEST0008"
        fam.version = 8
        fam.clades = [2]
        fam.consensus = "ACGTTGCAGAGACTCT"
        fam.repeat_type = "Test"
        fam.repeat_subtype = "CodingSequence"
        fam.coding_sequences = json.dumps([
            {
                "cds_start": 1,
                "cds_end": 6,
                "product": "FAKE",
                "exon_count": 1,
                "description": "Example coding sequence",
                "translation": "TL",
            },
            {
                "cds_start": 5,
                "cds_end": 16,
                "product": "FAKE2",
                "exon_count": 1,
                "description": "Another example coding sequence",
                "translation": "CRDS",
            },
        ])

        self.assertEqual(
            fam.to_embl(fakedb(), include_seq=False), """\
ID   TEST0008; SV 8; linear; DNA; STD; UNC; 16 BP.
NM   Test8
XX
AC   TEST0008;
XX
XX
KW   Test/CodingSequence.
XX
OS   A Clade
OC   Parent Clade.
XX
CC
CC   RepeatMasker Annotations:
CC        Type: Test
CC        SubType: CodingSequence
CC        Species: A_Clade
CC        SearchStages: 
CC        BufferStages: 
XX
FH   Key             Location/Qualifiers
FH
FT   CDS             1..6
FT                   /product="FAKE"
FT                   /number=1
FT                   /note="Example coding sequence"
FT                   /translation="TL"
FT   CDS             5..16
FT                   /product="FAKE2"
FT                   /number=1
FT                   /note="Another example coding sequence"
FT                   /translation="CRDS"
XX
//
""")
Example #19
0
    def test_citations(self):
        fam = Family()
        fam.name = "Test7"
        fam.accession = "TEST0007"
        fam.version = 7
        fam.clades = [2]
        fam.consensus = "ACGTTGCAGAGACTCT"
        fam.length = 16
        fam.repeat_type = "Test"
        fam.repeat_subtype = "HasCitations"
        fam.citations = json.dumps([
            {
                "order_added": 1,
                "authors": "John Doe",
                "title": "Testing Citation Export Formatting",
                "journal": "Unit Tests 7(2), 2020.",
            },
            {
                "order_added": 2,
                "authors": "Jane Doe",
                "title": "Testing Citation Export Formatting",
                "journal": "Unit Tests 7(2), 2020.",
            },
        ])

        self.assertEqual(
            fam.to_embl(fakedb(), include_seq=False), """\
ID   TEST0007; SV 7; linear; DNA; STD; UNC; 16 BP.
NM   Test7
XX
AC   TEST0007;
XX
XX
KW   Test/HasCitations.
XX
OS   A Clade
OC   Parent Clade.
XX
RN   [1] (bases 1 to 16)
RA   John Doe
RT   Testing Citation Export Formatting
RL   Unit Tests 7(2), 2020.
XX
RN   [2] (bases 1 to 16)
RA   Jane Doe
RT   Testing Citation Export Formatting
RL   Unit Tests 7(2), 2020.
XX
CC
CC   RepeatMasker Annotations:
CC        Type: Test
CC        SubType: HasCitations
CC        Species: A_Clade
CC        SearchStages: 
CC        BufferStages: 
XX
//
""")
Example #20
0
    def test_classname(self):
        fam = Family()
        fam.name = "Test2"
        fam.accession = "TEST0002"
        fam.version = 2
        fam.clades = []
        fam.consensus = "TCGATTTT"
        fam.repeat_type = "Type"

        self.assertEqual(
            fam.to_fasta(None, include_class_in_name=True),
            ">Test2#Type\nTCGATTTT\n"
        )

        fam.repeat_subtype = "SubType"

        self.assertEqual(
            fam.to_fasta(None, include_class_in_name=True),
            ">Test2#Type/SubType\nTCGATTTT\n"
        )
Example #21
0
    def test_special_metadata(self):
        fam = Family()
        fam.name = "Test5"
        fam.accession = "TEST0005"
        fam.version = 5
        fam.clades = [5, 3]
        fam.consensus = "ACGTTGCAGAGAKWCTCT"
        fam.repeat_type = "LTR"
        fam.repeat_subtype = "BigTest"
        fam.aliases = "Repbase:MyLTR1\nOtherDB:MyLTR\n"
        fam.refineable = True

        self.assertEqual(
            fam.to_embl(fakedb()), """\
ID   TEST0005; SV 5; linear; DNA; STD; UNC; 18 BP.
NM   Test5
XX
AC   TEST0005;
XX
XX
DR   Repbase; MyLTR1.
XX
KW   Long terminal repeat of retrovirus-like element; Test5.
XX
OS   Species 1
OC   Parent Clade; A Clade.
OS   Another Clade (3.)
OC   .
XX
CC
CC   RepeatMasker Annotations:
CC        Type: LTR
CC        SubType: BigTest
CC        Species: Species_1, Another_Clade_3.
CC        SearchStages: 
CC        BufferStages: 
CC        Refineable
XX
SQ   Sequence 18 BP; 4 A; 4 C; 4 G; 4 T; 2 other;
     acgttgcaga gakwctct                                                18
//
""")