def test_clustering(self):
     """
     Testing procedure for the PRD method.
     """
     infile = "Fasta/f007.fasta"
     informat = "fasta"
     treefile = "Newick/f007.newick"
     treeformat = "newick"
     # Check the input
     self.assertTrue(os.path.isfile(infile))
     self.assertEqual(len(list(SeqIO.parse(infile, informat))), 100)
     self.assertTrue(os.path.isfile(treefile))
     # Generate the subset division
     subset_dict = Cluster.get_subsets(
         "prd",
         infile,
         informat,
         tree_file=treefile,
         file_format=treeformat,
         subset_size=25,
         overlapping=4,
         binary=dcm3_exe,
     )
     # Check the output
     self.assertEqual(len(subset_dict), 17)
     result = [len(value) for value in viewvalues(subset_dict)]
     result.sort()
     self.assertEqual(result, [16, 17, 17, 18, 18, 19, 19, 19, 20, 20, 20, 20, 22, 22, 23, 24, 25])
Example #2
0
    def write(self, bioseqs_file):
        """
        Save all sequences stored at the BioSeqs object in the 'bioseqs_file'
        (in GENBANK format). A file with a detailed report of the sequences will
        be created replacing the extension of 'bioseqs_file' by ".rep". If
        'bioseqs_file' contains a relative path, the current working directory
        will be used to get the absolute path. If any file already exists, it
        will be overwritten without warning.
        
        Arguments :
            bioseqs_file  ( string )
                New BioSeqs sequence file.

        Raises :
            IOError
                If the path provided doesn't exist.
        """
        data_filepath = get_abspath(bioseqs_file)
        report_filepath = os.path.splitext(data_filepath)[0] + ".rep"
        # Generate a single string with all the report content
        str_report = "\n".join(["    ".join(x) for x in self._report])
        # Write all the information in the BioSeqs files
        try:
            SeqIO.write(viewvalues(self.data), data_filepath, "genbank")
            with open(report_filepath, "w") as report_file:
                report_file.write("Num. sequences: {:d}\nHistory:\n" "{:s}".format(len(self), str_report))
        except IOError:
            raise
        except:
            if os.path.lexists(data_filepath):
                os.remove(data_filepath)
            if os.path.lexists(report_filepath):
                os.remove(report_filepath)
            raise
Example #3
0
    def write(self, bioseqs_file):
        """
        Save all sequences stored at the BioSeqs object in the 'bioseqs_file'
        (in GENBANK format). A file with a detailed report of the sequences will
        be created replacing the extension of 'bioseqs_file' by ".rep". If
        'bioseqs_file' contains a relative path, the current working directory
        will be used to get the absolute path. If any file already exists, it
        will be overwritten without warning.

        Arguments:
            bioseqs_file  (string)
                New BioSeqs sequence file.

        Raises:
            IOError
                If the path provided doesn't exist.
        """
        data_filepath = get_abspath(bioseqs_file)
        report_filepath = os.path.splitext(data_filepath)[0] + '.rep'
        # Generate a single string with all the report content
        str_report = '\n'.join(['    '.join(x) for x in self._report])
        # Write all the information in the BioSeqs files
        try:
            SeqIO.write(viewvalues(self.data), data_filepath, 'genbank')
            with open(report_filepath, 'w') as report_file:
                report_file.write('Num. sequences: {:d}\nHistory:\n' \
                                  '{:s}'.format(len(self), str_report))
        except IOError:
            raise
        except:
            if (os.path.lexists(data_filepath)):
                os.remove(data_filepath)
            if (os.path.lexists(report_filepath)):
                os.remove(report_filepath)
            raise
 def test_clustering(self):
     """
     Testing procedure for the PRD method.
     """
     infile = 'Fasta/f007.fasta'
     informat = 'fasta'
     treefile = 'Newick/f007.newick'
     treeformat = 'newick'
     # Check the input
     self.assertTrue(os.path.isfile(infile))
     self.assertEqual(len(list(SeqIO.parse(infile, informat))), 100)
     self.assertTrue(os.path.isfile(treefile))
     # Generate the subset division
     subset_dict = Cluster.get_subsets('prd',
                                       infile,
                                       informat,
                                       tree_file=treefile,
                                       file_format=treeformat,
                                       subset_size=25,
                                       overlapping=4,
                                       binary=dcm3_exe)
     # Check the output
     self.assertEqual(len(subset_dict), 17)
     result = [len(value) for value in viewvalues(subset_dict)]
     result.sort()
     self.assertEqual(result, [
         16, 17, 17, 18, 18, 19, 19, 19, 20, 20, 20, 20, 22, 22, 23, 24, 25
     ])
 def test_clustering(self):
     """
     Testing procedure for the naive rows method.
     """
     infile = "Fasta/f001.fasta"
     informat = "fasta"
     # Check the input
     self.assertTrue(os.path.isfile(infile))
     self.assertEqual(len(list(SeqIO.parse(infile, informat))), 50)
     # Generate the alignment
     subset_dict = Cluster.get_subsets("rows", infile, informat, 5)
     # Check the output
     self.assertEqual(len(subset_dict), 5)
     for subset in viewvalues(subset_dict):
         self.assertEqual(len(subset), 10)
 def test_clustering ( self ) :
     """
     Testing procedure for the naive rows method.
     """
     infile = 'Fasta/f001.fasta'
     informat = 'fasta'
     # Check the input
     self.assertTrue(os.path.isfile(infile))
     self.assertEqual(len(list(SeqIO.parse(infile, informat))), 50)
     # Generate the alignment
     subset_dict = Cluster.get_subsets('rows', infile, informat, 5)
     # Check the output
     self.assertEqual(len(subset_dict), 5)
     for subset in viewvalues(subset_dict) :
         self.assertEqual(len(subset), 10)