def standard_test(self, informat, outformat, params): """ Standard testing procedure used by all tests. Arguments : informat ( string ) Input file format. outformat ( string ) Output file format. params ( string ) Arguments passed to the alignment tool. """ infile = '{}/f001.{}'.format(informat.capitalize(), informat) outfile = 'tmp_test.aln' self.add_file_to_clean(outfile) # Check the input self.assertTrue(os.path.isfile(infile)) self.assertEqual(len(list(SeqIO.parse(infile, informat))), 50) # Generate the alignment Align.get_alignment(mafft_exe, infile, informat, args=params, outfile=outfile, outfile_format=outformat) # Check the output self.assertTrue(os.path.isfile(outfile)) out_align = SeqIO.to_dict(SeqIO.parse(outfile, outformat)) prevfile = '{}/f001.mafft_{}.aln'.format(outformat.capitalize(), params) self.assertTrue(os.path.isfile(prevfile)) prev_align = SeqIO.to_dict(SeqIO.parse(prevfile, outformat)) self.assertEqual(len(viewkeys(out_align)), len(viewkeys(prev_align))) for key, value in viewitems(out_align): self.assertEqual(str(value.seq), str(prev_align[key].seq))
def standard_test ( self, informat, outformat, params ) : """ Standard testing procedure used by all tests. Arguments : informat ( string ) Input file format. outformat ( string ) Output file format. params ( string ) Arguments passed to the alignment tool. """ infile = '{}/f001.{}'.format(informat.capitalize(), informat) outfile = 'tmp_test.aln' self.add_file_to_clean(outfile) # Check the input self.assertTrue(os.path.isfile(infile)) self.assertEqual(len(list(SeqIO.parse(infile, informat))), 50) # Generate the alignment Align.get_alignment(muscle_exe, infile, informat, args=params, outfile=outfile, outfile_format=outformat) # Check the output self.assertTrue(os.path.isfile(outfile)) out_align = SeqIO.to_dict(SeqIO.parse(outfile, outformat)) prevfile = '{}/f001.muscle_{}.aln'.format(outformat.capitalize(), params) self.assertTrue(os.path.isfile(prevfile)) prev_align = SeqIO.to_dict(SeqIO.parse(prevfile, outformat)) self.assertEqual(len(viewkeys(out_align)), len(viewkeys(prev_align))) for key, value in viewitems(out_align) : self.assertEqual(str(value.seq), str(prev_align[key].seq))
def test_simple_alignment(self): """ Test of the alignment method for all the available configurations with supported input and output formats. """ for keyword in viewkeys(Align.get_keywords(mafft_exe)): self.standard_test('fasta', 'fasta', keyword)
def test_simple_alignment ( self ) : """ Test of the alignment method for all the available configurations with supported input and output formats. """ for keyword in viewkeys(Align.get_keywords(muscle_exe)) : self.standard_test('fasta', 'fasta', keyword)
def _normalization ( record, refseq_record, alignment_bin ) : """ Normalization of the input sequence with the reference sequence. The normalization consists on aligning both sequences and removing those sites where a gap has been introduced in the reference sequence, making the features of the reference sequence applicable to the new sequence. It returns the new sequence and the reference sequence's features. Arguments : record ( Bio.SeqRecord ) Sequence to normalize. refseq_record ( Bio.SeqRecord ) Reference sequence of the same type as 'record'. alignment_bin ( string ) Binary path to the alignment tool that will be used in the normalization process. Returns : Bio.Seq Normalized sequence of 'record'. list List of SeqFeature objects (from Biopython) from reference sequence. Raises : RuntimeError If the call to the alignment tool command raises an exception. """ tmpfile = tempfile.NamedTemporaryFile() SeqIO.write([refseq_record, record], tmpfile.name, 'fasta') alignment = Align.get_alignment(alignment_bin, tmpfile.name, 'fasta') # Get the normalized sequence by removing the sites that correspond to gaps # introduced in the reference sequence during the alignment process record_seq = ''.join((x for i, x in enumerate(alignment[1]) if alignment[0][i] != '-')) return ( Seq(record_seq, refseq_record.seq.alphabet), refseq_record.features )
def _normalization(record, refseq_record, alignment_bin): """ Normalization of the input sequence with the reference sequence. The normalization consists on aligning both sequences and removing those sites where a gap has been introduced in the reference sequence, making the features of the reference sequence applicable to the new sequence. It returns the new sequence and the reference sequence's features. Arguments : record ( Bio.SeqRecord ) Sequence to normalize. refseq_record ( Bio.SeqRecord ) Reference sequence of the same type as 'record'. alignment_bin ( string ) Binary path to the alignment tool that will be used in the normalization process. Returns : Bio.Seq Normalized sequence of 'record'. list List of SeqFeature objects (from Biopython) from reference sequence. Raises : RuntimeError If the call to the alignment tool command raises an exception. """ tmpfile = tempfile.NamedTemporaryFile() SeqIO.write([refseq_record, record], tmpfile.name, 'fasta') alignment = Align.get_alignment(alignment_bin, tmpfile.name, 'fasta') # Get the normalized sequence by removing the sites that correspond to gaps # introduced in the reference sequence during the alignment process record_seq = ''.join( (x for i, x in enumerate(alignment[1]) if alignment[0][i] != '-')) return (Seq(record_seq, refseq_record.seq.alphabet), refseq_record.features)