def test_clustering(self): """ Testing procedure for the PRD method. """ infile = "Fasta/f007.fasta" informat = "fasta" treefile = "Newick/f007.newick" treeformat = "newick" # Check the input self.assertTrue(os.path.isfile(infile)) self.assertEqual(len(list(SeqIO.parse(infile, informat))), 100) self.assertTrue(os.path.isfile(treefile)) # Generate the subset division subset_dict = Cluster.get_subsets( "prd", infile, informat, tree_file=treefile, file_format=treeformat, subset_size=25, overlapping=4, binary=dcm3_exe, ) # Check the output self.assertEqual(len(subset_dict), 17) result = [len(value) for value in viewvalues(subset_dict)] result.sort() self.assertEqual(result, [16, 17, 17, 18, 18, 19, 19, 19, 20, 20, 20, 20, 22, 22, 23, 24, 25])
def write(self, bioseqs_file): """ Save all sequences stored at the BioSeqs object in the 'bioseqs_file' (in GENBANK format). A file with a detailed report of the sequences will be created replacing the extension of 'bioseqs_file' by ".rep". If 'bioseqs_file' contains a relative path, the current working directory will be used to get the absolute path. If any file already exists, it will be overwritten without warning. Arguments : bioseqs_file ( string ) New BioSeqs sequence file. Raises : IOError If the path provided doesn't exist. """ data_filepath = get_abspath(bioseqs_file) report_filepath = os.path.splitext(data_filepath)[0] + ".rep" # Generate a single string with all the report content str_report = "\n".join([" ".join(x) for x in self._report]) # Write all the information in the BioSeqs files try: SeqIO.write(viewvalues(self.data), data_filepath, "genbank") with open(report_filepath, "w") as report_file: report_file.write("Num. sequences: {:d}\nHistory:\n" "{:s}".format(len(self), str_report)) except IOError: raise except: if os.path.lexists(data_filepath): os.remove(data_filepath) if os.path.lexists(report_filepath): os.remove(report_filepath) raise
def write(self, bioseqs_file): """ Save all sequences stored at the BioSeqs object in the 'bioseqs_file' (in GENBANK format). A file with a detailed report of the sequences will be created replacing the extension of 'bioseqs_file' by ".rep". If 'bioseqs_file' contains a relative path, the current working directory will be used to get the absolute path. If any file already exists, it will be overwritten without warning. Arguments: bioseqs_file (string) New BioSeqs sequence file. Raises: IOError If the path provided doesn't exist. """ data_filepath = get_abspath(bioseqs_file) report_filepath = os.path.splitext(data_filepath)[0] + '.rep' # Generate a single string with all the report content str_report = '\n'.join([' '.join(x) for x in self._report]) # Write all the information in the BioSeqs files try: SeqIO.write(viewvalues(self.data), data_filepath, 'genbank') with open(report_filepath, 'w') as report_file: report_file.write('Num. sequences: {:d}\nHistory:\n' \ '{:s}'.format(len(self), str_report)) except IOError: raise except: if (os.path.lexists(data_filepath)): os.remove(data_filepath) if (os.path.lexists(report_filepath)): os.remove(report_filepath) raise
def test_clustering(self): """ Testing procedure for the PRD method. """ infile = 'Fasta/f007.fasta' informat = 'fasta' treefile = 'Newick/f007.newick' treeformat = 'newick' # Check the input self.assertTrue(os.path.isfile(infile)) self.assertEqual(len(list(SeqIO.parse(infile, informat))), 100) self.assertTrue(os.path.isfile(treefile)) # Generate the subset division subset_dict = Cluster.get_subsets('prd', infile, informat, tree_file=treefile, file_format=treeformat, subset_size=25, overlapping=4, binary=dcm3_exe) # Check the output self.assertEqual(len(subset_dict), 17) result = [len(value) for value in viewvalues(subset_dict)] result.sort() self.assertEqual(result, [ 16, 17, 17, 18, 18, 19, 19, 19, 20, 20, 20, 20, 22, 22, 23, 24, 25 ])
def test_clustering(self): """ Testing procedure for the naive rows method. """ infile = "Fasta/f001.fasta" informat = "fasta" # Check the input self.assertTrue(os.path.isfile(infile)) self.assertEqual(len(list(SeqIO.parse(infile, informat))), 50) # Generate the alignment subset_dict = Cluster.get_subsets("rows", infile, informat, 5) # Check the output self.assertEqual(len(subset_dict), 5) for subset in viewvalues(subset_dict): self.assertEqual(len(subset), 10)
def test_clustering ( self ) : """ Testing procedure for the naive rows method. """ infile = 'Fasta/f001.fasta' informat = 'fasta' # Check the input self.assertTrue(os.path.isfile(infile)) self.assertEqual(len(list(SeqIO.parse(infile, informat))), 50) # Generate the alignment subset_dict = Cluster.get_subsets('rows', infile, informat, 5) # Check the output self.assertEqual(len(subset_dict), 5) for subset in viewvalues(subset_dict) : self.assertEqual(len(subset), 10)