Exemple #1
0
 def test_validate_alignment(self):
     ali_str = "> target\n--AAAA\n> template\nGGGG--\n"
     ali = read_alignment(ali_str)
     exp_ali = read_alignment(MINI_ALIGNMENT_FILE)
     self.assertEqual(str(validate_alignment(ali)), str(ali))
     self.assertEqual(str(validate_alignment(ali_str)), str(ali))
     self.assertEqual(str(validate_alignment(MINI_ALIGNMENT_FILE)), str(exp_ali))
     self.assertRaises(ParameterError, validate_alignment, 112345)
Exemple #2
0
def validate_alignment(ali):
    """Checks alignment string or object"""
    if isinstance(ali, RNAAlignment):
        return ali
    elif isinstance(ali, str):
        return read_alignment(ali)
    raise ParameterError("Bad parameter: '%s' must be a pairwise Alignment (object or fasta string)."%str(ali))
 def test_short_with_insert(self):
     a = read_alignment(WITH_INSERT)
     recipe = RecipeMaker(a).recipe
     self.assertEqual(len(recipe.difficult), 0)
     self.assertEqual(len(recipe.add_fragment), 1)
     self.assertEqual(len(recipe.add_fragment_5p), 0)
     self.assertEqual(len(recipe.add_fragment_3p), 0)
 def test_overhang(self):
     """Gaps at begin and end should be used only if in the template."""
     a = read_alignment(OVERHANG_ALIGN)
     recipe = RecipeMaker(a).recipe
     self.assertEqual(len(recipe.add_fragment_5p), 1)
     self.assertEqual(len(recipe.add_fragment_5p[0]), 3)
     self.assertEqual(len(recipe.add_fragment_3p), 1)
     self.assertEqual(len(recipe.add_fragment_3p[0]), 4)
 def test_gaps_in_target(self):
     """Moderna should model gaps in the target, too."""
     a = read_alignment(ALIGN_TARGET_GAP)
     m = RnaModel(self.t, a)
     m.apply_alignment()
     m.insert_all_fragments()
     self.assertEqual(
         m.get_sequence().seq_with_modifications.replace('_', ''),
         '..CUGACCU#P')
    def test_model_with_5p3p_ends(self):
        a = read_alignment('''> target
CAUGCGGAYYYALCUCAGGUA
> mini_template
---GCGGAUUUALCUCAG---
''')
        m = RnaModel(self.t, a)
        m.create_model()
        self.assertEqual(m.get_sequence(), Sequence('CAUGCGGAYYYALCUCAGGUA'))
Exemple #7
0
    def test_build_model_1exd_B(self):
        """Tests building model for 2tra_A on template 1exd_B.pdb"""
        ali_string = """> 2tra_A
UC-C--G--U---G--A-------U----A---G---U---U-P--AAD---------GGD---------CA-GA-A--U-------G----G---G-C----G-C--------P----------U-----G--------U-----------C----------K------------C-------------G---U--G--C-CAG--------------------------------------------------A---U-?-G--G---G-G--T   ---P-C-A-----AU-----------U-----C---C--C---C---G--------U--C---G--C--G-GAG-C--
> 1ehd_B.pdb 
-G-G--G--G---U--A-------U----C---G---C---C-A--AGC---------GGU----------A-AG-G--C-------A----C---C-G----G-A--------U----------U-----C--------U-----------G----------A------------U-------------U---C--C--G-G-A--------------------------------------------------G--GU-C-G--A---G-G--U   ---U-C-G-----AA-----------U-----C---C--U---C---G--------U--A---C--C--C-CAG-CCA"""
        ali = read_alignment(ali_string)
        t = load_template(RNA_1EXD, 'B')
        m = create_model(t, ali, 'B')
Exemple #8
0
    def test_build_model_2bte_B(self):
        """ Tests building model for 2j00_W on template 2bte_B.pdb"""
        ali_string = """> 2j00_W.pdb CP000026.1/4316393-4316321
GC-C--C--G---G--A-------U----A---G---C---U-C--AGU--------CGGU----------A-GA-G--C-------A----G---G-G----G-A--------U----------U-----G--------A-----------A----------A------------A-------------U---C--C--C-CGU--------------------------------------------------G--UC-C-U--U---G-G--U---U-C-G-----AU-----------U-----C---C--G---A---G--------U--C---C--G--G-GCA-CCA
> 2bte_B.pdb 
GC-C--G--G---G--G-------U----G---G---C---G-G--A-AU-------GGGU----------A-GACG--C-------G----C---A-U----G-A--------C_---------A-----U--------C-----------A----------U------------G-------------U---G--C--G-CAA--------------------------------------------------G-CGU-G-C--G---G-G--U---U-C-A-----AG-----------U-----C---C--C---G---C--------C--C---C--C--G-GCA-CCA"""
        ali = read_alignment(ali_string)
        t = load_template(RNA_2BTE, 'B')
        m = create_model(t, ali, 'B')
    def test_5p_extension(self):
        a = read_alignment("""> target
AAAAAAAAAAGCGGAUUUALCUCAG
> template
----------GCGGAUUUALCUCAG
        """)
        m = RnaModel(None, a, data_type='file', data=MINI_TEMPLATE)
        m.add_missing_5p()
        self.assertEqual(m.get_sequence(), a.target_seq)
Exemple #10
0
    def test_complicated_trna_alignment(self):
        ali = read_alignment("""> 1j2b_D.pdb
GGGCCCGUGGUCUAGUU_--------G-ACGCC-GCCC-UUACGAGGCGGAG-----------G-UC-CGGGGUUC-A--AG--U-C-CC--C-G-CG-GGCCCA-CCA
> 2du6_D.pdb
--GCCAGGGUGGCAGA---GGGGCUUU-GCGGC-GGAC-UCUAGAUCCGCUUU----------A--C-CCCGGUUC-G--AA--U-C-CG--G-G-CC-CUG-GC----
""")
        expected = """
GGGCCCGUGGUCUAGUU_------GACGCCGCCCUUACGAGGCGGAG-GUCCGGGGUUCAAGUCCCCGCGGGCCCACCA
--GCCAGGGUGGCAGAG-GGGCUUUGCGGCGGACUCUAGAUCCGCUUUA-CCCCGGUUCGAAUCCGGGCCCUG-GC---
"""
        self.assertEqual(str(ali), expected)
 def test_create_model_with_gaps(self):
     """Should create the model automatically."""
     a = read_alignment(ALIGN_1B23_1QF6)
     t = Template(RNA_1B23, 'file', 'R')
     m = RnaModel(t, a)
     m.apply_alignment()
     m.insert_all_fragments()
     self.assertEqual(
         m.get_sequence().seq_with_modifications.replace('_', ''),
         'GCCGAUAUAGCUCAGDDGGDAGAGCAGCGCAUUCGUEAUGCGAAG7UCGUAGGTPCGACUCCUAUUAUCGGCACCA'
     )
    def test_fix_example(self):
        """Real-world example from Irina"""
        ali = read_alignment(""">1EIY:C|PDBID|CHAIN|SEQUENCE
GCCGAGGUAGCUCAGUUGGUAGAGCAUGCGACUGAAAAUCGCAGUGUCCGCGGUUCGAUUCCGCGCCUCGGCACCA
>1EHZ:A|PDBID|CHAIN|SEQUENCE
GCGGAUUUAGCUCAGUUGGGAGAGCGCCAGACUGAAGAUCUGGAGGUCCUGUGUUCGAUCCACAGAAUUCGCACCA""")
        am = AlignmentMatcher(ali)
        seq = Sequence('GCGGAUUUALCUCAGDDGGGAGAGCRCCAGABU#AAYAP?UGGAG7UC?UGUGTPCG"UCCACAGAAUUCGCACCA')
        self.assertFalse(am.is_template_identical(seq))
        am.fix_template_seq(seq)
        self.assertTrue(am.is_template_identical(seq))
    def test_model_with_hydro_template(self):
        """If the template contains hydrogens, modifications should be added."""
        t = Template(RNA_HYDRO, 'file', 'B')
        a = read_alignment("""> 3tra_A.pdb Z73314.1/2358-2429
UPA
> 1qru_B.pdb X55374.1/1-72
CAA
""")
        m = RnaModel(t, a)
        m.create_model()
        self.assertEqual(m.get_sequence(), Sequence('UPA'))
    def test_model_with_alignment_adjustment(self):
        """Introduces small corrections on alignment."""
        a = read_alignment("""> target
ACUGUGAYUA[UACCU#P-G
> template with small errors.
GCG7A----U.UAGCUCA_G
        """)
        t = Template(MINI_TEMPLATE, 'file')
        match_template_with_alignment(t, a)
        m = RnaModel(t, a)
        m.create_model()
        self.assertEqual(m.get_sequence(), Sequence("ACUGUGAYUA[UACCU#PG"))
    def test_number_gap(self):
        """Builds model with numbering gap in the template."""
        a = read_alignment("""> target
CCGACCUUCGGCCACCUGACAGUCCUGUGCGGGAAACCGCACAGGACUGUCAACCAGGUAAUAUAACCACCGGGAAACGGUGGUUAUAUUACCUGGUACGCCUUGACGUGGGGGAAACCCCACGUCAAGGCGUGGUGGCCGAAGGUCGG
> template
CCGACCUUCGGCCACCUGACAGUCCUGUGCGG----CCGCACAGGACUGUCAACCAGGUAAUAUAACCACCGG----CGGUGGUUAUAUUACCUGGUACGCCUUGACGUGGGG----CCCCACGUCAAGGCGUGGUGGCCGAAGGUCGG
""")
        t = Template(JMB_TEMPLATE, 'file')
        clean_structure(t)
        m = RnaModel(t, a)
        m.create_model()
        self.assertEqual(m.get_sequence().seq_without_breaks, a.target_seq)
Exemple #16
0
    def test_build_model_1efw_D(self):
        """ Tests building model for 1ehz_A on template 1efw_D. Should work even when 50 fragment candidates are checked"""
        ali_string = """> 1ehz_A.pdb M14856.1/1-73
GC-G--G--A---U--U-------U----A---L---C---U-C--AGD--------DGGG----------A-GA-G--C-------R----C---C-A----G-A--------B----------U-----#--------A-----------A----------Y------------A-------------P---?--U--G-GAG--------------------------------------------------7--UC-?-U--G---U-G--T   ---P-C-G-----"U-----------C-----C---A--C---A---G--------A--A---U--U--C-GCA-CCA
> 1efw_D.pdb
GG-A--G--C---G--G-------4----A---G---U---U-C--AGD--------CGGD---------DA-GA-A--U-------A----C---C-U----G-C--------C----------U-----Q--------U-----------C----------/------------C-------------G---C--A--G-GGG--------------------------------------------------7--UC-G-C--G---G-G--018U---P-C-G-----AG-----------U-----C---C--C---G---P--------C--C---G--U--U-CC-----"""
        ali = read_alignment(ali_string)
        t = load_template(RNA_1EFW, 'D')
        m = create_model(t, ali, 'D')
        if os.access(OUTPUT, os.F_OK): os.remove(OUTPUT)
        write_model(m, OUTPUT)
        self.assertTrue(os.access(OUTPUT, os.F_OK))
    def test_doublegap_model(self):
        """Should create a model filling two gaps"""
        a = read_alignment('''> target
GGGAUAGUUCCAGABU#A
> template
GGGA-AG--CCAGABU#A
''')
        t = Template(DOUBLEGAP, 'file', 'A')
        m = RnaModel(t, a)
        m.apply_alignment()
        m.insert_all_fragments()
        m.fix_backbone()
        self.assertEqual(m.get_sequence(), Sequence('GGGAUAGUUCCAGABU#A'))
    def test_fix_alignment_with_gap(self):
        """Real-world exaple from nettab - 1qf6 modeling on 1c0a"""
        ali = read_alignment(""">1QF6
DDGGD-AGAGAAA
>1C0A
UCGGUUAGAA---""")
        true_target_seq = ali.aligned_sequences[0]
        am = AlignmentMatcher(ali)
        seq = Sequence('DCGGDDAGAA')
        self.assertFalse(am.is_template_identical(seq))
        am.fix_template_seq(seq)
        self.assertTrue(am.is_template_identical(seq))
        self.assertEqual(ali.aligned_sequences[0], true_target_seq)
    def test_oppositegap_model(self):
        """Should create a model with close gaps in the other respective sequence"""
        a = read_alignment('''> target
GGGAGAGCRUUAG-BU#A
> template
GGGAGAGCR--AGABU#A
''')
        t = Template(OPPOSITEGAP, 'file', 'A')
        m = RnaModel(t, a)
        m.apply_alignment()
        m.insert_all_fragments()
        self.assertEqual(
            m.get_sequence().seq_with_modifications.replace('_', ''),
            'GGGAGAGCRUUAGBU#A')
    def test_long_5p_extension(self):
        a = read_alignment("""> target
AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAGCGGAUUUALCUCAG
> template
----------------------------------------GCGGAUUUALCUCAG
        """)
        # first number == 1 should not work
        m = RnaModel(None, a, data_type='file', data=MINI_TEMPLATE)
        self.assertRaises(RenumeratorError, m.add_missing_5p)
        # first number == 100 should work
        m = RnaModel(None, a, data_type='file', data=MINI_TEMPLATE)
        renumber_chain(m, 100)
        m.add_missing_5p()
        self.assertEqual(m.get_sequence(), a.target_seq)
    def test_model_with_close_gaps(self):
        t = Template('test_data/rna_structures/2du3_excerpt.ent', 'file', 'D')
        a = read_alignment('''> 1qru_B.pdb X55374.1/1-72
GCA-UUCCG
> 2du3_D.pdb
CUUUA-CCC
''')
        m = RnaModel()
        copy_some_residues([t['944'], t['946'], t['947']], m)
        lc = find_fragment(m, '946', '947', Sequence('U'))
        insert_fragment(m, lc[0])
        lc = find_fragment(m, '944', '946', Sequence(''))
        insert_fragment(m, lc[0])
        return  #TODO: remove
        # generate model
        m = RnaModel(t, a)
        m.create_model()
Exemple #22
0
def load_alignment(file_path):
    """*load_alignment(file_path)*
    
Loads a sequence alignment from a FASTA file.
Produces an Alignment object that can be saved in a variable.

ModeRNA expects, that the alignment file contains exactly two sequences. 
The first is for the target for which the model is to be built, 
the second for the structural template.

Standard RNA bases should be written in upper case (ACGU).
Standard DNA bases should be written in lower case (acgt).
For modified bases, see the 'concepts' section of the manual.

:Arguments:
    * path+filename of a FASTA file
    """
    file_path = validate_filename(file_path)
    return read_alignment(file_path) 
 def test_close_gaps(self):
     """Gaps close to each other should also work."""
     a = read_alignment(CLOSEGAPS)
     recipe = RecipeMaker(a).recipe
     self.assertEqual(len(recipe.difficult), 0)
     self.assertEqual(len(recipe.add_fragment), 4)
 def setUp(self):
     self.a = read_alignment(MINI_ALIGNMENT)
     self.t = Template(MINI_TEMPLATE, seq=Sequence("GCGGAUUUALCUCAG"))
     self.m = RnaModel(self.t, self.a)
     self.seq_before = self.t.get_sequence()
     self.br = BaseRecognizer()
 def test_gap_optimization_example_2(self):
     t = Template('test_data/gaps/mini_1h4s_T_gap2.pdb', 'file', 'T')
     a = read_alignment('test_data/gaps/ali_gap2.fasta')
     m = RnaModel(t, a)
     m.create_model()
 def test_insert_indel_quality_1(self):
     """Insert a fragment without strand break"""
     t = Template('test_data/gaps/mini_1h4s_T_gap2.pdb', 'file', 'T')
     a = read_alignment('test_data/gaps/ali_gap2.fasta')
     m = RnaModel(t, a)
     self.assertEqual(m.get_sequence().seq_with_modifications.find('_'), -1)
 def test_no_overhan(self):
     a = read_alignment(NO_OVERHANG_ALIGN)
     recipe = RecipeMaker(a).recipe
     self.assertEqual(len(recipe.add_fragment_5p), 0)
     self.assertEqual(len(recipe.add_fragment_3p), 0)
 def test_boxes(self):
     for seq1, seq2, matches in BOX_SAMPLES:
         align = read_alignment("> 1\n%s\n> 2\n%s\n" % (seq1, seq2))
         recipe = RecipeMaker(align).recipe
         for ap, m in zip(align, matches):
             self.assertTrue(self.find_in_box(recipe, ap, m))
 def setUp(self):
     """Loads the fasta file with alignmnet."""
     self.a = read_alignment(MINI_ALIGNMENT)
     self.recipe = RecipeMaker(self.a).recipe
 def test_create_model_with_unknown(self):
     """Alignment that contains unknown bases in the target."""
     a = read_alignment(MINI_ALIGNMENT_WITH_UNK)
     m = RnaModel(self.t, a)
     m.create_model()
     self.assertEqual(m.get_sequence(), Sequence('..CUGUQQUACCU#P'))