def test_cmalign_from_alignment(self): """cmalign_from_alignment should work as expected. """ #Align with cmalign_from_alignment without original alignment. aln, struct = cmalign_from_alignment(aln=self.seqs2_aligned,\ structure_string=self.struct2_aligned_string,\ seqs=self.seqs1_unaligned_gaps,moltype=RNA,include_aln=False) #Check correct alignment self.assertEqual(aln.todict(),self.seqs1_aligned) #Check correct struct self.assertEqual(wuss_to_vienna(str(struct)),\ self.seqs1_aligned_struct_string) #should work with gapped seqs. Need to test this is taken care of # since cmalign segfaults when there are gaps in the seqs to be aligned. aln, struct = cmalign_from_alignment(aln=self.seqs2_aligned,\ structure_string=self.struct2_aligned_string,\ seqs=self.seqs1_unaligned_gaps,moltype=RNA) #alignment should be correct self.assertEqual(aln.todict(),self.seqs1_and_seqs2_aligned) #structure should be correct self.assertEqual(wuss_to_vienna(str(struct)),\ self.seqs1_and_seqs2_aligned_struct_string) #should work with ungapped seqs. aln, struct = cmalign_from_alignment(aln=self.seqs2_aligned,\ structure_string=self.struct2_aligned_string,\ seqs=self.seqs1_unaligned_gaps,moltype=RNA) #alignment should be correct self.assertEqual(aln.todict(),self.seqs1_and_seqs2_aligned) #structure should be correct self.assertEqual(wuss_to_vienna(str(struct)),\ self.seqs1_and_seqs2_aligned_struct_string) #should return standard out aln, struct,stdout = cmalign_from_alignment(aln=self.seqs2_aligned,\ structure_string=self.struct2_aligned_string,\ seqs=self.seqs1_unaligned_gaps,moltype=RNA,\ return_stdout=True) #Test that standard out is same length as expected self.assertEqual(len(stdout.split('\n')),\ len(CMALIGN_STDOUT.split('\n')))
def __call__(self, seq_path, result_path=None, log_path=None, failure_path=None, cmbuild_params=None, cmalign_params=None): log_params = [] # load candidate sequences candidate_sequences = dict(parse_fasta(open(seq_path, 'U'))) # load template sequences try: info, template_alignment, struct = list(MinimalRfamParser(open( self.Params['template_filepath'], 'U'), seq_constructor=ChangedSequence))[0] except RecordError: raise ValueError( "Template alignment must be in Stockholm format with corresponding secondary structure annotation when using InfernalAligner.") # Need to make separate mapping for unaligned sequences unaligned = SequenceCollection.from_fasta_records( candidate_sequences.iteritems(), DNASequence) mapped_seqs, new_to_old_ids = unaligned.int_map(prefix='unaligned_') mapped_seq_tuples = [(k, str(v)) for k,v in mapped_seqs.iteritems()] # Turn on --gapthresh option in cmbuild to force alignment to full # model if cmbuild_params is None: cmbuild_params = {} cmbuild_params.update({'--gapthresh': 1.0}) # record cmbuild parameters log_params.append('cmbuild parameters:') log_params.append(str(cmbuild_params)) # Turn on --sub option in Infernal, since we know the unaligned sequences # are fragments. # Also turn on --gapthresh to use same gapthresh as was used to build # model if cmalign_params is None: cmalign_params = {} cmalign_params.update({'--sub': True, '--gapthresh': 1.0}) # record cmalign parameters log_params.append('cmalign parameters:') log_params.append(str(cmalign_params)) # Align sequences to alignment including alignment gaps. aligned, struct_string = cmalign_from_alignment(aln=template_alignment, structure_string=struct, seqs=mapped_seq_tuples, include_aln=True, params=cmalign_params, cmbuild_params=cmbuild_params) # Pull out original sequences from full alignment. infernal_aligned = [] # Get a dict of the ids to sequences (note that this is a # cogent alignment object, hence the call to NamedSeqs) aligned_dict = aligned.NamedSeqs for n, o in new_to_old_ids.iteritems(): aligned_seq = aligned_dict[n] infernal_aligned.append((o, aligned_seq)) # Create an Alignment object from alignment dict infernal_aligned = Alignment.from_fasta_records(infernal_aligned, DNASequence) if log_path is not None: log_file = open(log_path, 'w') log_file.write('\n'.join(log_params)) log_file.close() if result_path is not None: result_file = open(result_path, 'w') result_file.write(infernal_aligned.to_fasta()) result_file.close() return None else: try: return infernal_aligned except ValueError: return {}