Ejemplo n.º 1
0
    def setUp(self):
        """Clearcut general setUp method for all tests"""
        self.seqs1 = ["ACUGCUAGCUAGUAGCGUACGUA", "GCUACGUAGCUAC", "GCGGCUAUUAGAUCGUA"]

        self.labels1 = [">1", ">2", ">3"]
        self.lines1 = flatten(zip(self.labels1, self.seqs1))

        self.seqs2 = ["UAGGCUCUGAUAUAAUAGCUCUC", "UAUCGCUUCGACGAUUCUCUGAUAGAGA", "UGACUACGCAU"]
        self.labels2 = [">a", ">b", ">c"]
        self.lines2 = flatten(zip(self.labels2, self.seqs2))

        self.temp_dir = tempfile.mkdtemp()
        # self.temp_dir_spaces = '/tmp/test for clearcut/'
        # try:
        #    mkdir(self.temp_dir_spaces)
        # except OSError:
        #    pass
        try:
            # create sequence files
            f = open(path.join(self.temp_dir, "seq1.txt"), "w")
            f.write("\n".join(self.lines1))
            f.close()
            g = open(path.join(self.temp_dir, "seq2.txt"), "w")
            g.write("\n".join(self.lines2))
            g.close()
        except OSError:
            pass
Ejemplo n.º 2
0
    def setUp(self):
        """Clearcut general setUp method for all tests"""
        self.seqs1 = ['ACUGCUAGCUAGUAGCGUACGUA','GCUACGUAGCUAC',
            'GCGGCUAUUAGAUCGUA']
        
        self.labels1 = ['>1','>2','>3']
        self.lines1 = flatten(zip(self.labels1,self.seqs1))

        self.seqs2=['UAGGCUCUGAUAUAAUAGCUCUC','UAUCGCUUCGACGAUUCUCUGAUAGAGA',
            'UGACUACGCAU']
        self.labels2=['>a','>b','>c']
        self.lines2 = flatten(zip(self.labels2,self.seqs2))
        
        self.temp_dir = tempfile.mkdtemp()
        #self.temp_dir_spaces = '/tmp/test for clearcut/'
        #try:
        #    mkdir(self.temp_dir_spaces)
        #except OSError:
        #    pass
        try:
            #create sequence files
            f = open(path.join(self.temp_dir, 'seq1.txt'),'w')
            f.write('\n'.join(self.lines1))
            f.close()
            g = open(path.join(self.temp_dir, 'seq2.txt'),'w')
            g.write('\n'.join(self.lines2))
            g.close()
        except OSError:
            pass
Ejemplo n.º 3
0
    def setUp(self):
        """Clearcut general setUp method for all tests"""
        self.seqs1 = [
            'ACUGCUAGCUAGUAGCGUACGUA', 'GCUACGUAGCUAC', 'GCGGCUAUUAGAUCGUA'
        ]

        self.labels1 = ['>1', '>2', '>3']
        self.lines1 = flatten(zip(self.labels1, self.seqs1))

        self.seqs2 = [
            'UAGGCUCUGAUAUAAUAGCUCUC', 'UAUCGCUUCGACGAUUCUCUGAUAGAGA',
            'UGACUACGCAU'
        ]
        self.labels2 = ['>a', '>b', '>c']
        self.lines2 = flatten(zip(self.labels2, self.seqs2))

        self.temp_dir = tempfile.mkdtemp()
        #self.temp_dir_spaces = '/tmp/test for clearcut/'
        #try:
        #    mkdir(self.temp_dir_spaces)
        #except OSError:
        #    pass
        try:
            #create sequence files
            f = open(path.join(self.temp_dir, 'seq1.txt'), 'w')
            f.write('\n'.join(self.lines1))
            f.close()
            g = open(path.join(self.temp_dir, 'seq2.txt'), 'w')
            g.write('\n'.join(self.lines2))
            g.close()
        except OSError:
            pass
Ejemplo n.º 4
0
    def setUp(self):
        """Muscle general setUp method for all tests"""

        # Check if muscle version is supported for this test
        acceptable_version = (3, 6)
        command = "muscle -version"
        proc = Popen(command,shell=True,universal_newlines=True,\
                     stdout=PIPE,stderr=STDOUT)
        stdout = proc.stdout.read()
        version_string = stdout.strip().split(' ')[1].strip()[1:]
        try:
            version = tuple(map(int, version_string.split('.')))
            pass_test = version[:2] == acceptable_version
        except ValueError:
            pass_test = False
            version_string = stdout
        self.assertTrue(pass_test,\
         "Unsupported muscle version. %s is required, but running %s." \
         % ('.'.join(map(str,acceptable_version)), version_string))

        self.seqs1 = [
            'ACUGCUAGCUAGUAGCGUACGUA', 'GCUACGUAGCUAC', 'GCGGCUAUUAGAUCGUA'
        ]

        self.labels1 = ['>1', '>2', '>3']
        self.lines1 = flatten(zip(self.labels1, self.seqs1))

        self.seqs2 = [
            'UAGGCUCUGAUAUAAUAGCUCUC', 'UAUCGCUUCGACGAUUCUCUGAUAGAGA',
            'UGACUACGCAU'
        ]
        self.labels2 = ['>a', '>b', '>c']
        self.lines2 = flatten(zip(self.labels2, self.seqs2))

        self.temp_dir = tempfile.mkdtemp()
        self.temp_dir_spaces = '/tmp/test for muscle/'
        try:
            mkdir(self.temp_dir_spaces)
        except OSError:
            pass
        try:
            #create sequence files
            f = open(path.join(self.temp_dir, 'seq1.txt'), 'w')
            f.write('\n'.join(self.lines1))
            f.close()
            g = open(path.join(self.temp_dir, 'seq2.txt'), 'w')
            g.write('\n'.join(self.lines2))
            g.close()
        except OSError:
            pass
Ejemplo n.º 5
0
    def test_build_tree_from_alignment(self):
        """Clearcut should return a tree built from the passed alignment"""
        tree_short = build_tree_from_alignment(build_tree_seqs_short,\
            moltype=DNA)
        num_seqs = flatten(build_tree_seqs_short).count('>')
        self.assertEqual(len(tree_short.tips()), num_seqs)

        tree_long = build_tree_from_alignment(build_tree_seqs_long, moltype=DNA)
        seq_names = []
        for line in build_tree_seqs_long.split('\n'):
            if line.startswith('>'):
                seq_names.append(line[1:])

        for node in tree_long.tips():
            if node.Name not in seq_names:
                self.fail()
        #repeat with best_tree = True
        tree_long = build_tree_from_alignment(build_tree_seqs_long,\
            best_tree=True,\
            moltype=DNA)
        seq_names = []
        for line in build_tree_seqs_long.split('\n'):
            if line.startswith('>'):
                seq_names.append(line[1:])

        for node in tree_long.tips():
            if node.Name not in seq_names:
                self.fail()
        
        #build_tree_from_alignment should raise DataError when constructing
        # an Alignment from unaligned sequences. Clearcut only allows aligned
        # or a distance matrix as input.
        self.assertRaises(DataError,build_tree_from_alignment,\
            build_tree_seqs_unaligned,DNA)
Ejemplo n.º 6
0
    def setUp(self):
        """Check if Raxml version is supported for this test"""
        acceptable_version = (7,3,0)
        self.assertTrue(app_path('raxmlHPC'),
         "raxmlHPC not found. This may or may not be a problem depending on "+\
         "which components of QIIME you plan to use.")
        command = "raxmlHPC -v | grep version"
        proc = Popen(command,shell=True,universal_newlines=True,\
                         stdout=PIPE,stderr=STDOUT)
        stdout = proc.stdout.read()
        version_string = stdout.strip().split(' ')[4].strip()
        try:
            version = tuple(map(int,version_string.split('.')))
            pass_test = version == acceptable_version
        except ValueError:
            pass_test = False
            version_string = stdout
        self.assertTrue(pass_test,\
         "Unsupported raxmlHPC version. %s is required, but running %s." \
         % ('.'.join(map(str,acceptable_version)), version_string))
        
        
        """Setup data for raxml tests"""
        self.seqs1 = ['ACUGCUAGCUAGUAGCGUACGUA','GCUACGUAGCUAC',
            'GCGGCUAUUAGAUCGUA']
        self.labels1 = ['>1','>2','>3']
        self.lines1 = flatten(zip(self.labels1,self.seqs1))

        self.test_model = "GTRCAT"

        self.align1 = get_align_for_phylip(StringIO(PHYLIP_FILE))

        self.test_fn1 = "/tmp/raxml_test1.txt"
        self.test_fn2 = "raxml_test1.txt"
        self.test_fn1_space = "/tmp/raxml test1.txt"
Ejemplo n.º 7
0
 def getDistinct(self, table_name, column):
     """returns the Ensembl data-bases distinct values for the named
     property_type.
     
     Arguments:
         - table_name: the data base table name
         - column: valid values are biotype, status"""
     table = self.getTable(table_name)
     query = sql.select([table.c[column]], distinct=True)
     records = set()
     string_types = str, unicode
     for record in query.execute():
         if type(record) not in string_types and \
             type(record[0]) not in string_types:
             # multi-dimensioned list/tuple
             record = flatten(record)
         elif type(record) not in string_types:
             # list/tuple of strings
             record = tuple(record)
         else:
             # a string
             record = [record]
         
         records.update(record)
     return records
Ejemplo n.º 8
0
    def getDistinct(self, table_name, column):
        """returns the Ensembl data-bases distinct values for the named
        property_type.
        
        Arguments:
            - table_name: the data base table name
            - column: valid values are biotype, status"""
        table = self.getTable(table_name)
        query = sql.select([table.c[column]], distinct=True)
        records = set()
        string_types = str, unicode
        for record in query.execute():
            if type(record) not in string_types and \
                type(record[0]) not in string_types:
                # multi-dimensioned list/tuple
                record = flatten(record)
            elif type(record) not in string_types:
                # list/tuple of strings
                record = tuple(record)
            else:
                # a string
                record = [record]

            records.update(record)
        return records
Ejemplo n.º 9
0
    def setUp(self):
        """Check if Raxml version is supported for this test"""
        acceptable_version = (7, 3, 0)
        self.assertTrue(app_path('raxmlHPC'),
         "raxmlHPC not found. This may or may not be a problem depending on "+\
         "which components of QIIME you plan to use.")
        command = "raxmlHPC -v | grep version"
        proc = Popen(command,shell=True,universal_newlines=True,\
                         stdout=PIPE,stderr=STDOUT)
        stdout = proc.stdout.read()
        version_string = stdout.strip().split(' ')[4].strip()
        try:
            version = tuple(map(int, version_string.split('.')))
            pass_test = version == acceptable_version
        except ValueError:
            pass_test = False
            version_string = stdout
        self.assertTrue(pass_test,\
         "Unsupported raxmlHPC version. %s is required, but running %s." \
         % ('.'.join(map(str,acceptable_version)), version_string))
        """Setup data for raxml tests"""
        self.seqs1 = [
            'ACUGCUAGCUAGUAGCGUACGUA', 'GCUACGUAGCUAC', 'GCGGCUAUUAGAUCGUA'
        ]
        self.labels1 = ['>1', '>2', '>3']
        self.lines1 = flatten(zip(self.labels1, self.seqs1))

        self.test_model = "GTRCAT"

        self.align1 = get_align_for_phylip(StringIO(PHYLIP_FILE))

        self.test_fn1 = "/tmp/raxml_test1.txt"
        self.test_fn2 = "raxml_test1.txt"
        self.test_fn1_space = "/tmp/raxml test1.txt"
Ejemplo n.º 10
0
    def test_build_tree_from_alignment(self):
        """Clearcut should return a tree built from the passed alignment"""
        tree_short = build_tree_from_alignment(build_tree_seqs_short,\
            moltype=DNA)
        num_seqs = flatten(build_tree_seqs_short).count('>')
        self.assertEqual(len(tree_short.tips()), num_seqs)

        tree_long = build_tree_from_alignment(build_tree_seqs_long,
                                              moltype=DNA)
        seq_names = []
        for line in build_tree_seqs_long.split('\n'):
            if line.startswith('>'):
                seq_names.append(line[1:])

        for node in tree_long.tips():
            if node.Name not in seq_names:
                self.fail()
        #repeat with best_tree = True
        tree_long = build_tree_from_alignment(build_tree_seqs_long,\
            best_tree=True,\
            moltype=DNA)
        seq_names = []
        for line in build_tree_seqs_long.split('\n'):
            if line.startswith('>'):
                seq_names.append(line[1:])

        for node in tree_long.tips():
            if node.Name not in seq_names:
                self.fail()

        #build_tree_from_alignment should raise DataError when constructing
        # an Alignment from unaligned sequences. Clearcut only allows aligned
        # or a distance matrix as input.
        self.assertRaises(DataError,build_tree_from_alignment,\
            build_tree_seqs_unaligned,DNA)
Ejemplo n.º 11
0
    def setUp(self):
        """Muscle general setUp method for all tests"""

        # Check if muscle version is supported for this test
        acceptable_version = (3,6)
        command = "muscle -version"
        proc = Popen(command,shell=True,universal_newlines=True,\
                     stdout=PIPE,stderr=STDOUT)
        stdout = proc.stdout.read()
        version_string = stdout.strip().split(' ')[1].strip()[1:]
        try:
            version = tuple(map(int,version_string.split('.')))
            pass_test = version[:2] == acceptable_version
        except ValueError:
            pass_test = False
            version_string = stdout
        self.assertTrue(pass_test,\
         "Unsupported muscle version. %s is required, but running %s." \
         % ('.'.join(map(str,acceptable_version)), version_string))

        self.seqs1 = ['ACUGCUAGCUAGUAGCGUACGUA','GCUACGUAGCUAC',
            'GCGGCUAUUAGAUCGUA']
        
        self.labels1 = ['>1','>2','>3']
        self.lines1 = flatten(zip(self.labels1,self.seqs1))

        self.seqs2=['UAGGCUCUGAUAUAAUAGCUCUC','UAUCGCUUCGACGAUUCUCUGAUAGAGA',
            'UGACUACGCAU']
        self.labels2=['>a','>b','>c']
        self.lines2 = flatten(zip(self.labels2,self.seqs2))
        
        self.temp_dir = tempfile.mkdtemp()
        self.temp_dir_spaces = '/tmp/test for muscle/'
        try:
            mkdir(self.temp_dir_spaces)
        except OSError:
            pass
        try:
            #create sequence files
            f = open(path.join(self.temp_dir, 'seq1.txt'),'w')
            f.write('\n'.join(self.lines1))
            f.close()
            g = open(path.join(self.temp_dir, 'seq2.txt'),'w')
            g.write('\n'.join(self.lines2))
            g.close()
        except OSError:
            pass
Ejemplo n.º 12
0
 def test_with_remainder(self):
     """return the correct groups when there's a remainder"""
     data = range(21)
     grouped = util.make_even_groups(data, 5)
     self.assertEqual(len(grouped), 4)
     for group in grouped:
         self.assertEqual(len(group), 5)
     full = sorted(flatten(grouped))
     self.assertEqual(full, data[:-1])
Ejemplo n.º 13
0
 def test_one_group(self):
     """only one group"""
     data = range(20)
     grouped = util.make_even_groups(data, 20)
     self.assertEqual(len(grouped), 1)
     for group in grouped:
         self.assertEqual(len(group), 20)
     full = sorted(flatten(grouped))
     self.assertEqual(full, data)
Ejemplo n.º 14
0
    def setUp(self):
        """Mafft general setUp method for all tests"""
        self.seqs1 = [
            'ACUGCUAGCUAGUAGCGUACGUA', 'GCUACGUAGCUAC', 'GCGGCUAUUAGAUCGUA'
        ]

        self.labels1 = ['>1', '>2', '>3']
        self.lines1 = flatten(zip(self.labels1, self.seqs1))

        self.aligned1 = {'1': 'acugcuagcuaguagcguacgua',\
                         '2': 'gcuacguagcuac----------',\
                         '3': 'gcggcuauuagau------cgua',\
                         }

        self.seqs2 = [
            'UAGGCUCUGAUAUAAUAGCUCUC', 'UAUCGCUUCGACGAUUCUCUGAUAGAGA',
            'UGACUACGCAU'
        ]
        self.labels2 = ['>a', '>b', '>c']
        self.lines2 = flatten(zip(self.labels2, self.seqs2))

        self.aligned2 = {'a': 'UAGGCUCUGAUAUAAUAGCUCUC---------',\
                         'b': 'UA----UCGCUUCGACGAUUCUCUGAUAGAGA',\
                         'c': 'UG------------ACUACGCAU---------',\
                         }

        self.temp_dir = tempfile.mkdtemp()
        self.temp_dir_spaces = '/tmp/test for mafft/'
        try:
            mkdir(self.temp_dir_spaces)
        except OSError:
            pass
        try:
            #create sequence files
            f = open(path.join(self.temp_dir, 'seq1.txt'), 'w')
            f.write('\n'.join(self.lines1))
            f.close()
            g = open(path.join(self.temp_dir, 'seq2.txt'), 'w')
            g.write('\n'.join(self.lines2))
            g.close()
        except OSError:
            pass
Ejemplo n.º 15
0
    def setUp(self):
        """Mafft general setUp method for all tests"""
        self.seqs1 = ['ACUGCUAGCUAGUAGCGUACGUA','GCUACGUAGCUAC',
            'GCGGCUAUUAGAUCGUA']
        
        self.labels1 = ['>1','>2','>3']
        self.lines1 = flatten(zip(self.labels1,self.seqs1))
        
        self.aligned1 = {'1': 'acugcuagcuaguagcguacgua',\
                         '2': 'gcuacguagcuac----------',\
                         '3': 'gcggcuauuagau------cgua',\
                         }

        
        self.seqs2=['UAGGCUCUGAUAUAAUAGCUCUC','UAUCGCUUCGACGAUUCUCUGAUAGAGA',
            'UGACUACGCAU']
        self.labels2=['>a','>b','>c']
        self.lines2 = flatten(zip(self.labels2,self.seqs2))
        
        self.aligned2 = {'a': 'UAGGCUCUGAUAUAAUAGCUCUC---------',\
                         'b': 'UA----UCGCUUCGACGAUUCUCUGAUAGAGA',\
                         'c': 'UG------------ACUACGCAU---------',\
                         }

        
        self.temp_dir = tempfile.mkdtemp()
        self.temp_dir_spaces = '/tmp/test for mafft/'
        try:
            mkdir(self.temp_dir_spaces)
        except OSError:
            pass
        try:
            #create sequence files
            f = open(path.join(self.temp_dir, 'seq1.txt'),'w')
            f.write('\n'.join(self.lines1))
            f.close()
            g = open(path.join(self.temp_dir, 'seq2.txt'),'w')
            g.write('\n'.join(self.lines2))
            g.close()
        except OSError:
            pass
Ejemplo n.º 16
0
    def test_build_tree_from_alignment(self):
        """Clustalw should return a tree built from the passed alignment"""
        tree_short = build_tree_from_alignment(self.build_tree_seqs_short, \
                RNA, best_tree=False)
        num_seqs = flatten(self.build_tree_seqs_short).count('>')
        self.assertEqual(len(tree_short.tips()), num_seqs)

        tree_long = build_tree_from_alignment(self.build_tree_seqs_long, \
                RNA, best_tree=False)
        seq_names = []
        for line in self.build_tree_seqs_long.split('\n'):
            if line.startswith('>'):
                seq_names.append(line[1:])

        for node in tree_long.tips():
            if node.Name not in seq_names:
                self.fail()

        tree_short = build_tree_from_alignment(self.build_tree_seqs_short, \
                RNA, best_tree=True, params={'-bootstrap':3})
        num_seqs = flatten(self.build_tree_seqs_short).count('>')
        self.assertEqual(len(tree_short.tips()), num_seqs)
Ejemplo n.º 17
0
    def test_build_tree_from_alignment(self):
        """Clustalw should return a tree built from the passed alignment"""
        tree_short = build_tree_from_alignment(self.build_tree_seqs_short, \
                RNA, best_tree=False)
        num_seqs = flatten(self.build_tree_seqs_short).count('>')
        self.assertEqual(len(tree_short.tips()), num_seqs)
        
        tree_long = build_tree_from_alignment(self.build_tree_seqs_long, \
                RNA, best_tree=False)
        seq_names = []
        for line in self.build_tree_seqs_long.split('\n'):
            if line.startswith('>'):
                seq_names.append(line[1:])

        for node in tree_long.tips():
            if node.Name not in seq_names:
                self.fail()

        tree_short = build_tree_from_alignment(self.build_tree_seqs_short, \
                RNA, best_tree=True, params={'-bootstrap':3})
        num_seqs = flatten(self.build_tree_seqs_short).count('>')
        self.assertEqual(len(tree_short.tips()), num_seqs)
Ejemplo n.º 18
0
    def setUp(self):
        """Setup data for raxml tests"""
        self.seqs1 = ['ACUGCUAGCUAGUAGCGUACGUA','GCUACGUAGCUAC',
            'GCGGCUAUUAGAUCGUA']
        self.labels1 = ['>1','>2','>3']
        self.lines1 = flatten(zip(self.labels1,self.seqs1))

        self.test_model = "GTRCAT"

        self.align1 = get_align_for_phylip(StringIO(PHYLIP_FILE))

        self.test_fn1 = "/tmp/raxml_test1.txt"
        self.test_fn2 = "raxml_test1.txt"
        self.test_fn1_space = "/tmp/raxml test1.txt"
Ejemplo n.º 19
0
    def test_bootstrap_tree_from_alignment(self):
        """Clustalw should return a bootstrapped tree from the passed aln"""
        tree_short = bootstrap_tree_from_alignment(self.build_tree_seqs_short)
        num_seqs = flatten(self.build_tree_seqs_short).count('>')
        self.assertEqual(len(tree_short.tips()), num_seqs)
        
        tree_long = bootstrap_tree_from_alignment(self.build_tree_seqs_long)
        seq_names = []
        for line in self.build_tree_seqs_long.split('\n'):
            if line.startswith('>'):
                seq_names.append(line[1:])

        for node in tree_long.tips():
            if node.Name not in seq_names:
                self.fail()
Ejemplo n.º 20
0
    def test_bootstrap_tree_from_alignment(self):
        """Clustalw should return a bootstrapped tree from the passed aln"""
        tree_short = bootstrap_tree_from_alignment(self.build_tree_seqs_short)
        num_seqs = flatten(self.build_tree_seqs_short).count('>')
        self.assertEqual(len(tree_short.tips()), num_seqs)

        tree_long = bootstrap_tree_from_alignment(self.build_tree_seqs_long)
        seq_names = []
        for line in self.build_tree_seqs_long.split('\n'):
            if line.startswith('>'):
                seq_names.append(line[1:])

        for node in tree_long.tips():
            if node.Name not in seq_names:
                self.fail()
Ejemplo n.º 21
0
    def setUp(self):
        """Setup data for raxml tests"""
        self.seqs1 = [
            'ACUGCUAGCUAGUAGCGUACGUA', 'GCUACGUAGCUAC', 'GCGGCUAUUAGAUCGUA'
        ]
        self.labels1 = ['>1', '>2', '>3']
        self.lines1 = flatten(zip(self.labels1, self.seqs1))

        self.test_model = "GTRCAT"

        self.align1 = get_align_for_phylip(StringIO(PHYLIP_FILE))

        self.test_fn1 = "/tmp/raxml_test1.txt"
        self.test_fn2 = "raxml_test1.txt"
        self.test_fn1_space = "/tmp/raxml test1.txt"
Ejemplo n.º 22
0
    def test_build_tree_from_alignment(self):
        """Muscle should return a tree built from the passed alignment"""
        tree_short = build_tree_from_alignment(build_tree_seqs_short, DNA)
        num_seqs = flatten(build_tree_seqs_short).count('>')
        self.assertEqual(len(tree_short.tips()), num_seqs)

        tree_long = build_tree_from_alignment(build_tree_seqs_long, DNA)
        seq_names = []
        for line in build_tree_seqs_long.split('\n'):
            if line.startswith('>'):
                seq_names.append(line[1:])

        for node in tree_long.tips():
            if node.Name not in seq_names:
                self.fail()
Ejemplo n.º 23
0
    def test_build_tree_from_alignment(self):
        """Muscle should return a tree built from the passed alignment"""
        tree_short = build_tree_from_alignment(build_tree_seqs_short, DNA)
        num_seqs = flatten(build_tree_seqs_short).count('>')
        self.assertEqual(len(tree_short.tips()), num_seqs)

        tree_long = build_tree_from_alignment(build_tree_seqs_long, DNA)
        seq_names = []
        for line in build_tree_seqs_long.split('\n'):
            if line.startswith('>'):
                seq_names.append(line[1:])

        for node in tree_long.tips():
            if node.Name not in seq_names:
                self.fail()
Ejemplo n.º 24
0
 def _get_symbol_from_synonym(self, db, synonym):
     """returns the gene symbol for a synonym"""
     synonym_table = db.getTable("external_synonym")
     xref_table = db.getTable("xref")
     joinclause = xref_table.join(synonym_table, xref_table.c.xref_id == synonym_table.c.xref_id)
     whereclause = synonym_table.c.synonym == synonym
     query = sql.select([xref_table.c.display_label], from_obj=[joinclause], whereclause=whereclause).distinct()
     result = query.execute().fetchall()
     if result:
         try:
             symbol = flatten(result)[0]
         except IndexError:
             symbol = None
     else:
         symbol = None
     return symbol
Ejemplo n.º 25
0
 def extract(self, seq):
     """Returns bases in pairs as list of tuples.
     
     Note: always returns list, even if only one base pair.
     """
     if self.Length > 1:
         return flatten([p.extract(seq) for p in self])
     else:
         if self.Start is not None:
             start = seq[self.Start]
         else:
             start = None
         if self.End is not None:
             end = seq[self.End]
         else:
             end = None
         return [(start, end)]
Ejemplo n.º 26
0
 def extract(self, seq):
     """Returns bases in pairs as list of tuples.
     
     Note: always returns list, even if only one base pair.
     """
     if self.Length > 1:
         return flatten([p.extract(seq) for p in self])
     else:
         if self.Start is not None:
             start = seq[self.Start]
         else:
             start = None
         if self.End is not None:
             end = seq[self.End]
         else:
             end = None
         return [(start, end)]
Ejemplo n.º 27
0
 def _get_symbol_from_synonym(self, db, synonym):
     """returns the gene symbol for a synonym"""
     synonym_table = db.getTable('external_synonym')
     xref_table = db.getTable('xref')
     joinclause = xref_table.join(
         synonym_table, xref_table.c.xref_id == synonym_table.c.xref_id)
     whereclause = synonym_table.c.synonym == synonym
     query = sql.select([xref_table.c.display_label],
                        from_obj=[joinclause],
                        whereclause=whereclause).distinct()
     result = query.execute().fetchall()
     if result:
         try:
             symbol = flatten(result)[0]
         except IndexError:
             symbol = None
     else:
         symbol = None
     return symbol
Ejemplo n.º 28
0
def seqids_from_otu_to_seqid(otu_to_seqid):
    """Returns set of all seq ids from libs"""
    return set(flatten(otu_to_seqid.values()))
Ejemplo n.º 29
0
def make_mage_output(groups, colors, coord_header, coords, pct_var, \
                     background_color,label_color,data_colors, \
                     taxa=None, custom_axes=None,name='', \
                     radius=None, alpha=.75, num_coords=10,scaled=False, \
                     coord_scale=1.05, edges=None, coords_low=None, \
                     coords_high=None, ellipsoid_prefs=None,
                     user_supplied_edges=False, ball_scale=1.0, \
                     arrow_colors={'line_color': 'white', 'head_color': 'red'}):
    """Convert groups, colors, coords and percent var into mage format"""
    result = []

    #Scale the coords and generate header labels
    if scaled:
        scalars = pct_var
        if custom_axes:
            # create a dummy vector of ones to avoid scaling custom axes
            custom_scalars = scalars[0] * np.ones(len(custom_axes))
            scalars = np.append(custom_scalars,scalars)
        coords = scale_pc_data_matrix(coords, scalars)
        if not coords_low is None:
            coords_low = scale_pc_data_matrix(coords_low, scalars)
        if not coords_high is None:
            coords_high = scale_pc_data_matrix(coords_high, scalars)
        header_suffix = '_scaled'
    else:
        header_suffix = '_unscaled'

    if radius is None:
        radius = float(auto_radius(coords))*float(ball_scale)
    else:
        radius = float(radius)*float(ball_scale)
        
    maxes = coords.max(0)[:num_coords]
    mins = coords.min(0)[:num_coords]
    pct_var = pct_var[:num_coords]    #scale from fraction
    
    #check that we didn't get fewer dimensions than we wanted
    if len(mins) < num_coords:
        num_coords = len(mins)
    min_maxes = flatten(zip(mins,maxes))
    
    if custom_axes:
        axis_names = ['PC%s' %(i+1) for i in xrange(num_coords - len(custom_axes))]
        axis_names = custom_axes + axis_names
    else:
        axis_names = ['PC%s' %(i+1) for i in xrange(num_coords)]

    #Write the header information
    result.append('@kinemage {%s}' % (name+header_suffix))
    result.append('@dimension '+' '.join(['{%s}'%(name) for name in axis_names]))
    result.append('@dimminmax '+ ' '.join(map(str, min_maxes)))
    result.append('@master {points}')
    result.append('@master {labels}')
    if edges:
        result.append('@master {edges}')

    if not taxa is None:
        result.append('@master {taxa_points}')
        result.append('@master {taxa_labels}')

    for name, color in sorted(data_colors.items()):
        result.append(color.toMage())

    if background_color=='white':
        result.append('@whitebackground')
        result.append('@hsvcolor {black} 0.0 0.0 0.0')
    else:
        result.append('@hsvcolor {white} 180.0 0.0 100.0')
    
    #Write the groups, colors and coords
    coord_dict = dict(zip(coord_header, coords))
    if not coords_low is None:
        coord_low_dict = dict(zip(coord_header, coords_low))
    if not coords_high is None:
        coord_high_dict = dict(zip(coord_header, coords_high))
    for group_name in natsort(groups):
        ids = groups[group_name]
        result.append('@group {%s (n=%s)} collapsible' % (group_name, len(ids)))

        color = colors[group_name]
        coord_lines = []
        for id_ in sorted(ids):
            if id_ in coord_dict:
                coord_lines.append('{%s} %s' % \
                    (id_, ' '.join(map(str, coord_dict[id_][:num_coords]))))

        # create list of balls, one for each sample
        result.append('@balllist color=%s radius=%s alpha=%s dimension=%s \
master={points} nobutton' % (color, radius, alpha, num_coords))
        result.append('\n'.join(coord_lines))
        # make ellipsoids if low and high coord bounds were received
        if (not coords_low is None) and (not coords_high is None):
            # create one trianglelist for each sample to define ellipsoids
            result += make_mage_ellipsoids(ids, coord_dict, coord_low_dict,
                                           coord_high_dict, color, ellipsoid_prefs)

        # create list of labels 
        result.append('@labellist color=%s radius=%s alpha=%s dimension=%s \
master={labels} nobutton' % (color, radius, alpha, num_coords))
        result.append('\n'.join(coord_lines))

    if not taxa is None:
        result += make_mage_taxa(taxa, num_coords, pct_var,
                                 scaled=scaled, scalars=None, radius=radius)

    #Write the axes on the bottom of the graph
    result.append('@group {axes} collapsible')
    state = 'on'
    axis_mins = mins*coord_scale
    axis_maxes = maxes*coord_scale

    if not custom_axes:
        custom_axes = []
    # draw each axis
    for i in xrange(num_coords):
        if i == 3:
            state = 'off'            
        result.append('@vectorlist {%s line} dimension=%s %s' % \
            (axis_names[i], num_coords, state))
            
        result.append(' '.join(map(str, axis_mins)) + ' ' + label_color)
        end = axis_mins.copy()
        end[i] = axis_maxes[i]
        result.append(' '.join(map(str, end)) + ' ' + label_color)
        end[i] *= coord_scale  #add scale factor to offset labels a little
            
        # custom axes come first, no "percent variance" shown
        if i < len(custom_axes):
            result.append('@labellist {%s} dimension=%s %s' % \
                              (axis_names[i], num_coords, state)) 
            result.append( ('{%s}' % (axis_names[i]))  + \
                               ' '.join(map(str, end)) + ' ' + label_color)
        # if all custom axes have been drawn, draw normal PC axes
        else:
            pct = pct_var[i-len(custom_axes)]
            result.append('@labellist {%s (%0.2g%%)} dimension=%s %s' % \
                              (axis_names[i], pct, num_coords, state))
            result.append( ('{%s (%0.2g%%)}' % (axis_names[i], pct))  + \
                               ' '.join(map(str, end)) + ' ' + label_color)

    #Write edges if requested
    if edges:
        result += make_edges_output(coord_dict, edges, num_coords, label_color,
                                    arrow_colors=arrow_colors,
                                    user_supplied_edges=user_supplied_edges)
    return result
Ejemplo n.º 30
0
def make_mage_output(groups, colors, coord_header, coords, pct_var, \
                     background_color,label_color,data_colors, \
                     taxa=None, custom_axes=None,name='', \
                     radius=None, alpha=.75, num_coords=10,scaled=False, \
                     coord_scale=1.05, edges=None, coords_low=None, \
                     coords_high=None, ellipsoid_prefs=None,
                     user_supplied_edges=False):
    """Convert groups, colors, coords and percent var into mage format"""
    result = []

    #Scale the coords and generate header labels
    if scaled:
        scalars = pct_var
        if custom_axes:
            # create a dummy vector of ones to avoid scaling custom axes
            custom_scalars = scalars[0] * np.ones(len(custom_axes))
            scalars = np.append(custom_scalars, scalars)
        coords = scale_pc_data_matrix(coords, scalars)
        if not coords_low is None:
            coords_low = scale_pc_data_matrix(coords_low, scalars)
        if not coords_high is None:
            coords_high = scale_pc_data_matrix(coords_high, scalars)
        header_suffix = '_scaled'
    else:
        header_suffix = '_unscaled'

    if radius is None:
        radius = auto_radius(coords)

    maxes = coords.max(0)[:num_coords]
    mins = coords.min(0)[:num_coords]
    pct_var = pct_var[:num_coords]  #scale from fraction

    #check that we didn't get fewer dimensions than we wanted
    if len(mins) < num_coords:
        num_coords = len(mins)
    min_maxes = flatten(zip(mins, maxes))

    if custom_axes:
        axis_names = [
            'PC%s' % (i + 1) for i in xrange(num_coords - len(custom_axes))
        ]
        axis_names = custom_axes + axis_names
    else:
        axis_names = ['PC%s' % (i + 1) for i in xrange(num_coords)]

    #Write the header information
    result.append('@kinemage {%s}' % (name + header_suffix))
    result.append('@dimension ' +
                  ' '.join(['{%s}' % (name) for name in axis_names]))
    result.append('@dimminmax ' + ' '.join(map(str, min_maxes)))
    result.append('@master {points}')
    result.append('@master {labels}')
    if edges:
        result.append('@master {edges}')

    if not taxa is None:
        result.append('@master {taxa_points}')
        result.append('@master {taxa_labels}')

    for name, color in sorted(data_colors.items()):
        result.append(color.toMage())

    if background_color == 'white':
        result.append('@whitebackground')
        result.append('@hsvcolor {black} 0.0 0.0 0.0')
    else:
        result.append('@hsvcolor {white} 180.0 0.0 100.0')

    #Write the groups, colors and coords
    coord_dict = dict(zip(coord_header, coords))
    if not coords_low is None:
        coord_low_dict = dict(zip(coord_header, coords_low))
    if not coords_high is None:
        coord_high_dict = dict(zip(coord_header, coords_high))
    for group_name in natsort(groups):
        ids = groups[group_name]
        result.append('@group {%s (n=%s)} collapsible' %
                      (group_name, len(ids)))

        color = colors[group_name]
        coord_lines = []
        for id_ in sorted(ids):
            if id_ in coord_dict:
                coord_lines.append('{%s} %s' % \
                    (id_, ' '.join(map(str, coord_dict[id_][:num_coords]))))

        # create list of balls, one for each sample
        result.append('@balllist color=%s radius=%s alpha=%s dimension=%s \
master={points} nobutton' % (color, radius, alpha, num_coords))
        result.append('\n'.join(coord_lines))
        # make ellipsoids if low and high coord bounds were received
        if (not coords_low is None) and (not coords_high is None):
            # create one trianglelist for each sample to define ellipsoids
            result += make_mage_ellipsoids(ids, coord_dict, coord_low_dict,
                                           coord_high_dict, color,
                                           ellipsoid_prefs)

        # create list of labels
        result.append('@labellist color=%s radius=%s alpha=%s dimension=%s \
master={labels} nobutton' % (color, radius, alpha, num_coords))
        result.append('\n'.join(coord_lines))

    if not taxa is None:
        result += make_mage_taxa(taxa,
                                 num_coords,
                                 pct_var,
                                 scaled=scaled,
                                 scalars=None,
                                 radius=radius)

    #Write the axes on the bottom of the graph
    result.append('@group {axes} collapsible')
    state = 'on'
    axis_mins = mins * coord_scale
    axis_maxes = maxes * coord_scale

    if not custom_axes:
        custom_axes = []
    # draw each axis
    for i in xrange(num_coords):
        if i == 3:
            state = 'off'
        result.append('@vectorlist {%s line} dimension=%s %s' % \
            (axis_names[i], num_coords, state))

        result.append(' '.join(map(str, axis_mins)) + ' ' + label_color)
        end = axis_mins.copy()
        end[i] = axis_maxes[i]
        result.append(' '.join(map(str, end)) + ' ' + label_color)
        end[i] *= coord_scale  #add scale factor to offset labels a little

        # custom axes come first, no "percent variance" shown
        if i < len(custom_axes):
            result.append('@labellist {%s} dimension=%s %s' % \
                              (axis_names[i], num_coords, state))
            result.append( ('{%s}' % (axis_names[i]))  + \
                               ' '.join(map(str, end)) + ' ' + label_color)
        # if all custom axes have been drawn, draw normal PC axes
        else:
            pct = pct_var[i - len(custom_axes)]
            result.append('@labellist {%s (%0.2g%%)} dimension=%s %s' % \
                              (axis_names[i], pct, num_coords, state))
            result.append( ('{%s (%0.2g%%)}' % (axis_names[i], pct))  + \
                               ' '.join(map(str, end)) + ' ' + label_color)

    #Write edges if requested
    if edges:
        result += make_edges_output(coord_dict,
                                    edges,
                                    num_coords,
                                    label_color,
                                    user_supplied_edges=user_supplied_edges)
    return result
Ejemplo n.º 31
0
    def setUp(self):
        """Dialign general setUp method for all tests"""
        self.seqs1 = ['LDTAPCLFSDGSPQKAAYVLWDQTILQQDITPLPSHETHSAQKGELLALICGLRAAK',
            'PDADHTWYTDGSSLLQEGQRKAGAAVTTETEVIWAKALDAGTSAQRAELIALTQALKM',
            'RPGLCQVFADATPTGWGLVMGHQRMRGTFSAPLPIHTAELLAACFARSRSGANIIGTDNSVV',
            'MLKQVEIFTDGSCLGNPGPGGYGAILRYRGREKTFSAGYTRTTNNRMELMAAIV']
        self.labels1 = ['>HTL2','>MMLV', '>HEPB', '>ECOL']
        self.lines1 = flatten(zip(self.labels1,self.seqs1))
        self.out = \
"""
                            DIALIGN 2.2.1 
                            *************

           Program code written by Burkhard Morgenstern and Said Abdeddaim 
              e-mail contact: dialign (at) gobics (dot) de 

           Published research assisted by DIALIGN 2 should cite:  

              Burkhard Morgenstern (1999).
              DIALIGN 2: improvement of the segment-to-segment
              approach to multiple sequence alignment.
              Bioinformatics 15, 211 - 218. 

           For more information, please visit the DIALIGN home page at 

              http://bibiserv.techfak.uni-bielefeld.de/dialign/ 

          ************************************************************



    program call:  dialign2-2 -fa -fn /tmp/di/seq1.fasta /tmp/di/seq1.txt  


    Aligned sequences:          length:
    ==================          =======

    1) HTL2                        57
    2) MMLV                        58
    3) HEPB                        62
    4) ECOL                        54

    Average seq. length:           57.8 


    Please note that only upper-case letters are considered to be aligned. 


    Alignment (DIALIGN format):
    ===========================

 HTL2               1   ldtapC-LFS DGS------P QKAAYVL--- ----WDQTIL QQDITPLPSH 
 MMLV               1   pdadhtw-YT DGSSLLQEGQ RKAGAAVtte teviWa---- KALDAG---T 
 HEPB               1   rpgl-CQVFA DAT------P TGWGLVM--- ----GHQRMR GTFSAPLPIH 
 ECOL               1   mlkqv-EIFT DGSCLGNPGP GGYGAIL--- ----RYRGRE KTFSAGytrT 

                        0000000588 8882222229 9999999000 0000666666 6666633334 

 HTL2              37   ethSAQKGEL LALICGLRAa k--------- --- 
 MMLV              43   ---SAQRAEL IALTQALKm- ---------- --- 
 HEPB              37   t------AEL LAA-CFARSr sganiigtdn svv 
 ECOL              43   ---TNNRMEL MAAIv----- ---------- --- 

                        0003333455 5533333300 0000000000 000 




    Sequence tree:
    ==============

 Tree constructed using UPGMAbased on DIALIGN fragment weight scores

 ((HTL2        :0.130254MMLV        :0.130254):0.067788(HEPB        :0.120520ECOL        :0.120520):0.077521);



"""
        self.temp_dir = tempfile.mkdtemp()
        try:
            #create sequence files
            f = open(path.join(self.temp_dir, 'seq1.txt'),'w')
            f.write('\n'.join(self.lines1))
            f.close()
        except OSError:
            pass
Ejemplo n.º 32
0
def expand_otu_map_seq_ids(otu_map, seq_id_map):
    for otu_id, seq_ids in otu_map.items():
        mapped_seq_ids = flatten([seq_id_map[seq_id] for seq_id in seq_ids])
        otu_map[otu_id] = mapped_seq_ids
    return otu_map
Ejemplo n.º 33
0
def expand_otu_map_seq_ids(otu_map, seq_id_map):
    for otu_id, seq_ids in otu_map.items():
        mapped_seq_ids = flatten(\
         [seq_id_map[seq_id] for seq_id in seq_ids])
        otu_map[otu_id] = mapped_seq_ids
    return otu_map
Ejemplo n.º 34
0
    def setUp(self):
        """Dialign general setUp method for all tests"""
        self.seqs1 = [
            'LDTAPCLFSDGSPQKAAYVLWDQTILQQDITPLPSHETHSAQKGELLALICGLRAAK',
            'PDADHTWYTDGSSLLQEGQRKAGAAVTTETEVIWAKALDAGTSAQRAELIALTQALKM',
            'RPGLCQVFADATPTGWGLVMGHQRMRGTFSAPLPIHTAELLAACFARSRSGANIIGTDNSVV',
            'MLKQVEIFTDGSCLGNPGPGGYGAILRYRGREKTFSAGYTRTTNNRMELMAAIV'
        ]
        self.labels1 = ['>HTL2', '>MMLV', '>HEPB', '>ECOL']
        self.lines1 = flatten(zip(self.labels1, self.seqs1))
        self.out = \
"""
                            DIALIGN 2.2.1 
                            *************

           Program code written by Burkhard Morgenstern and Said Abdeddaim 
              e-mail contact: dialign (at) gobics (dot) de 

           Published research assisted by DIALIGN 2 should cite:  

              Burkhard Morgenstern (1999).
              DIALIGN 2: improvement of the segment-to-segment
              approach to multiple sequence alignment.
              Bioinformatics 15, 211 - 218. 

           For more information, please visit the DIALIGN home page at 

              http://bibiserv.techfak.uni-bielefeld.de/dialign/ 

          ************************************************************



    program call:  dialign2-2 -fa -fn /tmp/di/seq1.fasta /tmp/di/seq1.txt  


    Aligned sequences:          length:
    ==================          =======

    1) HTL2                        57
    2) MMLV                        58
    3) HEPB                        62
    4) ECOL                        54

    Average seq. length:           57.8 


    Please note that only upper-case letters are considered to be aligned. 


    Alignment (DIALIGN format):
    ===========================

 HTL2               1   ldtapC-LFS DGS------P QKAAYVL--- ----WDQTIL QQDITPLPSH 
 MMLV               1   pdadhtw-YT DGSSLLQEGQ RKAGAAVtte teviWa---- KALDAG---T 
 HEPB               1   rpgl-CQVFA DAT------P TGWGLVM--- ----GHQRMR GTFSAPLPIH 
 ECOL               1   mlkqv-EIFT DGSCLGNPGP GGYGAIL--- ----RYRGRE KTFSAGytrT 

                        0000000588 8882222229 9999999000 0000666666 6666633334 

 HTL2              37   ethSAQKGEL LALICGLRAa k--------- --- 
 MMLV              43   ---SAQRAEL IALTQALKm- ---------- --- 
 HEPB              37   t------AEL LAA-CFARSr sganiigtdn svv 
 ECOL              43   ---TNNRMEL MAAIv----- ---------- --- 

                        0003333455 5533333300 0000000000 000 




    Sequence tree:
    ==============

 Tree constructed using UPGMAbased on DIALIGN fragment weight scores

 ((HTL2        :0.130254MMLV        :0.130254):0.067788(HEPB        :0.120520ECOL        :0.120520):0.077521);



"""
        self.temp_dir = tempfile.mkdtemp()
        try:
            #create sequence files
            f = open(path.join(self.temp_dir, 'seq1.txt'), 'w')
            f.write('\n'.join(self.lines1))
            f.close()
        except OSError:
            pass
Ejemplo n.º 35
0
    def setUp(self):
        """Clustalw general setUp method for all tests"""
        self.seqs1 = ['ACUGCUAGCUAGUAGCGUACGUA','GCUACGUAGCUAC',
            'GCGGCUAUUAGAUCGUA']
        self.aln1_fasta = ALIGN1_FASTA
        self.labels1 = ['>1','>2','>3']
        self.lines1 = flatten(zip(self.labels1,self.seqs1))
        self.stdout1 = STDOUT1
        self.aln1 = ALIGN1
        self.dnd1 = DND1
        
        self.multiline1 = '\n'.join(flatten(zip(self.labels1, self.seqs1)))
       
        self.seqs2=['UAGGCUCUGAUAUAAUAGCUCUC','UAUCGCUUCGACGAUUCUCUGAUAGAGA',
            'UGACUACGCAU']
        self.labels2=['>a','>b','>c']
        self.lines2 = flatten(zip(self.labels2,self.seqs2))
        self.aln2 = ALIGN2
        self.dnd2 = DND2
        
        self.twoalign = TWOALIGN
        self.alignseqs = ALIGNSEQS
        self.treeduringalignseqs = TREEDURINGALIGNSEQS
        self.treefromalignseqs = TREEFROMALIGNSEQS
        
        self.temp_dir_space = "/tmp/clustalw test"

        self.build_tree_seqs_short = """>clustal_test_seqs_0
AACCCCCACGGTGGATGCCACACGCCCCATACAAAGGGTAGGATGCTTAAGACACATCGCGTCAGGTTTGTGTCAGGCCT
AGCTTTAAATCATGCCAGTG
>clustal_test_seqs_1
GACCCACACGGTGGATGCAACAGATCCCATACACCGAGTTGGATGCTTAAGACGCATCGCGTGAGTTTTGCGTCAAGGCT
TGCTTTCAATAATGCCAGTG
>clustal_test_seqs_2
AACCCCCACGGTGGCAGCAACACGTCACATACAACGGGTTGGATTCTAAAGACAAACCGCGTCAAAGTTGTGTCAGAACT
TGCTTTGAATCATGCCAGTA
>clustal_test_seqs_3
AAACCCCACGGTAGCTGCAACACGTCCCATACCACGGGTAGGATGCTAAAGACACATCGGGTCTGTTTTGTGTCAGGGCT
TGCTTTACATCATGCAAGTG
>clustal_test_seqs_4
AACCGCCACGGTGGGTACAACACGTCCACTACATCGGCTTGGAAGGTAAAGACACGTCGCGTCAGTATTGCGTCAGGGCT
TGCTTTAAATCATGCCAGTG
>clustal_test_seqs_5
AACCCCCGCGGTAGGTGCAACACGTCCCATACAACGGGTTGGAAGGTTAAGACACAACGCGTTAATTTTGTGTCAGGGCA
TGCTTTAAATCATGCCAGTT
>clustal_test_seqs_6
GACCCCCGCGGTGGCTGCAAGACGTCCCATACAACGGGTTGGATGCTTAAGACACATCGCAACAGTTTTGAGTCAGGGCT
TACTTTAGATCATGCCGGTG
>clustal_test_seqs_7
AACCCCCACGGTGGCTACAAGACGTCCCATCCAACGGGTTGGATACTTAAGGCACATCACGTCAGTTTTGTGTCAGAGCT
TGCTTTAAATCATGCCAGTG
>clustal_test_seqs_8
AACCCCCACGGTGGCTGCAACACGTGGCATACAACGGGTTGGATGCTTAAGACACATCGCCTCAGTTTTGTGTCAGGGCT
TGCATTAAATCATGCCAGTG
>clustal_test_seqs_9
AAGCCCCACGGTGGCTGAAACACATCCCATACAACGGGTTGGATGCTTAAGACACATCGCATCAGTTTTATGTCAGGGGA
TGCTTTAAATCCTGACAGCG
"""
        self.build_tree_seqs_long = """>clustal_test_seqs_0
AACCCCCACGGTGGATGCCACACGCCCCATACAAAGGGTAGGATGCTTAAGACACATCGCGTCAGGTTTGTGTCAGGCCT
AGCTTTAAATCATGCCAGTG
>clustal_test_seqsaaaaaaaa_1
GACCCACACGGTGGATGCAACAGATCCCATACACCGAGTTGGATGCTTAAGACGCATCGCGTGAGTTTTGCGTCAAGGCT
TGCTTTCAATAATGCCAGTG
>clustal_test_seqsaaaaaaaa_2
AACCCCCACGGTGGCAGCAACACGTCACATACAACGGGTTGGATTCTAAAGACAAACCGCGTCAAAGTTGTGTCAGAACT
TGCTTTGAATCATGCCAGTA
>clustal_test_seqsaaaaaaaa_3
AAACCCCACGGTAGCTGCAACACGTCCCATACCACGGGTAGGATGCTAAAGACACATCGGGTCTGTTTTGTGTCAGGGCT
TGCTTTACATCATGCAAGTG
>clustal_test_seqsaaaaaaaa_4
AACCGCCACGGTGGGTACAACACGTCCACTACATCGGCTTGGAAGGTAAAGACACGTCGCGTCAGTATTGCGTCAGGGCT
TGCTTTAAATCATGCCAGTG
>clustal_test_seqsaaaaaaaa_5
AACCCCCGCGGTAGGTGCAACACGTCCCATACAACGGGTTGGAAGGTTAAGACACAACGCGTTAATTTTGTGTCAGGGCA
TGCTTTAAATCATGCCAGTT
>clustal_test_seqsaaaaaaaa_6
GACCCCCGCGGTGGCTGCAAGACGTCCCATACAACGGGTTGGATGCTTAAGACACATCGCAACAGTTTTGAGTCAGGGCT
TACTTTAGATCATGCCGGTG
>clustal_test_seqsaaaaaaaa_7
AACCCCCACGGTGGCTACAAGACGTCCCATCCAACGGGTTGGATACTTAAGGCACATCACGTCAGTTTTGTGTCAGAGCT
TGCTTTAAATCATGCCAGTG
>clustal_test_seqsaaaaaaaa_8
AACCCCCACGGTGGCTGCAACACGTGGCATACAACGGGTTGGATGCTTAAGACACATCGCCTCAGTTTTGTGTCAGGGCT
TGCATTAAATCATGCCAGTG
>clustal_test_seqsaaaaaaaa_9
AAGCCCCACGGTGGCTGAAACACATCCCATACAACGGGTTGGATGCTTAAGACACATCGCATCAGTTTTATGTCAGGGGA
TGCTTTAAATCCTGACAGCG
"""
        try:
            mkdir('/tmp/ct')
        except OSError: #dir already exists
            pass
        
        try:
            #create sequence files
            f = open('/tmp/ct/seq1.txt','w')
            f.write('\n'.join(self.lines1))
            f.close()
            g = open('/tmp/ct/seq2.txt','w')
            g.write('\n'.join(self.lines2))
            g.close()
            #create alignment files
            f = open('/tmp/ct/align1','w')
            f.write(self.aln1)
            f.close()
            g = open('/tmp/ct/align2','w')
            g.write(self.aln2)
            g.close()
            #create tree file
            f = open('/tmp/ct/tree1','w')
            f.write(DND1)
            f.close()
        except OSError:
            pass
Ejemplo n.º 36
0
    def Scores(self, value):
        if not hasattr(self, '_probeset_scores'):
            self._probeset_scores = None

        self._probeset_scores = flatten(value)
Ejemplo n.º 37
0
def seqids_from_otu_to_seqid(otu_to_seqid):
    """Returns set of all seq ids from libs"""
    return set(flatten(otu_to_seqid.values()))
Ejemplo n.º 38
0
    def setUp(self):
        """Clustalw general setUp method for all tests"""
        self.seqs1 = [
            'ACUGCUAGCUAGUAGCGUACGUA', 'GCUACGUAGCUAC', 'GCGGCUAUUAGAUCGUA'
        ]
        self.aln1_fasta = ALIGN1_FASTA
        self.labels1 = ['>1', '>2', '>3']
        self.lines1 = flatten(zip(self.labels1, self.seqs1))
        self.stdout1 = STDOUT1
        self.aln1 = ALIGN1
        self.dnd1 = DND1

        self.multiline1 = '\n'.join(flatten(zip(self.labels1, self.seqs1)))

        self.seqs2 = [
            'UAGGCUCUGAUAUAAUAGCUCUC', 'UAUCGCUUCGACGAUUCUCUGAUAGAGA',
            'UGACUACGCAU'
        ]
        self.labels2 = ['>a', '>b', '>c']
        self.lines2 = flatten(zip(self.labels2, self.seqs2))
        self.aln2 = ALIGN2
        self.dnd2 = DND2

        self.twoalign = TWOALIGN
        self.alignseqs = ALIGNSEQS
        self.treeduringalignseqs = TREEDURINGALIGNSEQS
        self.treefromalignseqs = TREEFROMALIGNSEQS

        self.temp_dir_space = "/tmp/clustalw test"

        self.build_tree_seqs_short = """>clustal_test_seqs_0
AACCCCCACGGTGGATGCCACACGCCCCATACAAAGGGTAGGATGCTTAAGACACATCGCGTCAGGTTTGTGTCAGGCCT
AGCTTTAAATCATGCCAGTG
>clustal_test_seqs_1
GACCCACACGGTGGATGCAACAGATCCCATACACCGAGTTGGATGCTTAAGACGCATCGCGTGAGTTTTGCGTCAAGGCT
TGCTTTCAATAATGCCAGTG
>clustal_test_seqs_2
AACCCCCACGGTGGCAGCAACACGTCACATACAACGGGTTGGATTCTAAAGACAAACCGCGTCAAAGTTGTGTCAGAACT
TGCTTTGAATCATGCCAGTA
>clustal_test_seqs_3
AAACCCCACGGTAGCTGCAACACGTCCCATACCACGGGTAGGATGCTAAAGACACATCGGGTCTGTTTTGTGTCAGGGCT
TGCTTTACATCATGCAAGTG
>clustal_test_seqs_4
AACCGCCACGGTGGGTACAACACGTCCACTACATCGGCTTGGAAGGTAAAGACACGTCGCGTCAGTATTGCGTCAGGGCT
TGCTTTAAATCATGCCAGTG
>clustal_test_seqs_5
AACCCCCGCGGTAGGTGCAACACGTCCCATACAACGGGTTGGAAGGTTAAGACACAACGCGTTAATTTTGTGTCAGGGCA
TGCTTTAAATCATGCCAGTT
>clustal_test_seqs_6
GACCCCCGCGGTGGCTGCAAGACGTCCCATACAACGGGTTGGATGCTTAAGACACATCGCAACAGTTTTGAGTCAGGGCT
TACTTTAGATCATGCCGGTG
>clustal_test_seqs_7
AACCCCCACGGTGGCTACAAGACGTCCCATCCAACGGGTTGGATACTTAAGGCACATCACGTCAGTTTTGTGTCAGAGCT
TGCTTTAAATCATGCCAGTG
>clustal_test_seqs_8
AACCCCCACGGTGGCTGCAACACGTGGCATACAACGGGTTGGATGCTTAAGACACATCGCCTCAGTTTTGTGTCAGGGCT
TGCATTAAATCATGCCAGTG
>clustal_test_seqs_9
AAGCCCCACGGTGGCTGAAACACATCCCATACAACGGGTTGGATGCTTAAGACACATCGCATCAGTTTTATGTCAGGGGA
TGCTTTAAATCCTGACAGCG
"""
        self.build_tree_seqs_long = """>clustal_test_seqs_0
AACCCCCACGGTGGATGCCACACGCCCCATACAAAGGGTAGGATGCTTAAGACACATCGCGTCAGGTTTGTGTCAGGCCT
AGCTTTAAATCATGCCAGTG
>clustal_test_seqsaaaaaaaa_1
GACCCACACGGTGGATGCAACAGATCCCATACACCGAGTTGGATGCTTAAGACGCATCGCGTGAGTTTTGCGTCAAGGCT
TGCTTTCAATAATGCCAGTG
>clustal_test_seqsaaaaaaaa_2
AACCCCCACGGTGGCAGCAACACGTCACATACAACGGGTTGGATTCTAAAGACAAACCGCGTCAAAGTTGTGTCAGAACT
TGCTTTGAATCATGCCAGTA
>clustal_test_seqsaaaaaaaa_3
AAACCCCACGGTAGCTGCAACACGTCCCATACCACGGGTAGGATGCTAAAGACACATCGGGTCTGTTTTGTGTCAGGGCT
TGCTTTACATCATGCAAGTG
>clustal_test_seqsaaaaaaaa_4
AACCGCCACGGTGGGTACAACACGTCCACTACATCGGCTTGGAAGGTAAAGACACGTCGCGTCAGTATTGCGTCAGGGCT
TGCTTTAAATCATGCCAGTG
>clustal_test_seqsaaaaaaaa_5
AACCCCCGCGGTAGGTGCAACACGTCCCATACAACGGGTTGGAAGGTTAAGACACAACGCGTTAATTTTGTGTCAGGGCA
TGCTTTAAATCATGCCAGTT
>clustal_test_seqsaaaaaaaa_6
GACCCCCGCGGTGGCTGCAAGACGTCCCATACAACGGGTTGGATGCTTAAGACACATCGCAACAGTTTTGAGTCAGGGCT
TACTTTAGATCATGCCGGTG
>clustal_test_seqsaaaaaaaa_7
AACCCCCACGGTGGCTACAAGACGTCCCATCCAACGGGTTGGATACTTAAGGCACATCACGTCAGTTTTGTGTCAGAGCT
TGCTTTAAATCATGCCAGTG
>clustal_test_seqsaaaaaaaa_8
AACCCCCACGGTGGCTGCAACACGTGGCATACAACGGGTTGGATGCTTAAGACACATCGCCTCAGTTTTGTGTCAGGGCT
TGCATTAAATCATGCCAGTG
>clustal_test_seqsaaaaaaaa_9
AAGCCCCACGGTGGCTGAAACACATCCCATACAACGGGTTGGATGCTTAAGACACATCGCATCAGTTTTATGTCAGGGGA
TGCTTTAAATCCTGACAGCG
"""
        try:
            mkdir('/tmp/ct')
        except OSError:  #dir already exists
            pass

        try:
            #create sequence files
            f = open('/tmp/ct/seq1.txt', 'w')
            f.write('\n'.join(self.lines1))
            f.close()
            g = open('/tmp/ct/seq2.txt', 'w')
            g.write('\n'.join(self.lines2))
            g.close()
            #create alignment files
            f = open('/tmp/ct/align1', 'w')
            f.write(self.aln1)
            f.close()
            g = open('/tmp/ct/align2', 'w')
            g.write(self.aln2)
            g.close()
            #create tree file
            f = open('/tmp/ct/tree1', 'w')
            f.write(DND1)
            f.close()
        except OSError:
            pass