예제 #1
0
    def test_cmalign_from_alignment(self):
        """cmalign_from_alignment should work as expected.
        """
        #Align with cmalign_from_alignment without original alignment.
        aln, struct = cmalign_from_alignment(aln=self.seqs2_aligned,\
            structure_string=self.struct2_aligned_string,\
            seqs=self.seqs1_unaligned_gaps,moltype=RNA,include_aln=False)
        #Check correct alignment
        self.assertEqual(aln.todict(),self.seqs1_aligned)
        #Check correct struct
        self.assertEqual(wuss_to_vienna(str(struct)),\
            self.seqs1_aligned_struct_string)

        #should work with gapped seqs.  Need to test this is taken care of
        # since cmalign segfaults when there are gaps in the seqs to be aligned.
        aln, struct = cmalign_from_alignment(aln=self.seqs2_aligned,\
            structure_string=self.struct2_aligned_string,\
            seqs=self.seqs1_unaligned_gaps,moltype=RNA)
        #alignment should be correct
        self.assertEqual(aln.todict(),self.seqs1_and_seqs2_aligned)
        #structure should be correct
        self.assertEqual(wuss_to_vienna(str(struct)),\
            self.seqs1_and_seqs2_aligned_struct_string)
        
        #should work with ungapped seqs.
        aln, struct = cmalign_from_alignment(aln=self.seqs2_aligned,\
            structure_string=self.struct2_aligned_string,\
            seqs=self.seqs1_unaligned_gaps,moltype=RNA)
        #alignment should be correct
        self.assertEqual(aln.todict(),self.seqs1_and_seqs2_aligned)
        #structure should be correct
        self.assertEqual(wuss_to_vienna(str(struct)),\
            self.seqs1_and_seqs2_aligned_struct_string)
        
        #should return standard out
        aln, struct,stdout = cmalign_from_alignment(aln=self.seqs2_aligned,\
            structure_string=self.struct2_aligned_string,\
            seqs=self.seqs1_unaligned_gaps,moltype=RNA,\
            return_stdout=True)
        #Test that standard out is same length as expected
        self.assertEqual(len(stdout.split('\n')),\
            len(CMALIGN_STDOUT.split('\n')))
예제 #2
0
 def __call__(self, seq_path, result_path=None, log_path=None, \
     failure_path=None, cmbuild_params=None, cmalign_params=None):
     
     log_params = []
     # load candidate sequences
     candidate_sequences = dict(MinimalFastaParser(open(seq_path,'U')))
     
     # load template sequences
     try:
         info, template_alignment, struct = list(MinimalRfamParser(open(\
             self.Params['template_filepath'],'U'),\
             seq_constructor=ChangedSequence))[0]
     except RecordError:
         raise ValueError, "Template alignment must be in Stockholm format with corresponding secondary structure annotation when using InfernalAligner."
     
     moltype = self.Params['moltype']
     
     #Need to make separate mapping for unaligned sequences
     unaligned = SequenceCollection(candidate_sequences,MolType=moltype)
     int_map, int_keys = unaligned.getIntMap(prefix='unaligned_')
     int_map = SequenceCollection(int_map,MolType=moltype)
     
     #Turn on --gapthresh option in cmbuild to force alignment to full model
     if cmbuild_params is None:
         cmbuild_params = {}
     cmbuild_params.update({'--gapthresh':1.0})
     
     #record cmbuild parameters
     log_params.append('cmbuild parameters:')
     log_params.append(str(cmbuild_params))
     
     #Turn on --sub option in Infernal, since we know the unaligned sequences
     # are fragments.
     #Also turn on --gapthresh to use same gapthresh as was used to build
     # model
     
     if cmalign_params is None:
         cmalign_params = {}
     cmalign_params.update({'--sub':True,'--gapthresh':1.0})
     
     #record cmalign parameters
     log_params.append('cmalign parameters:')
     log_params.append(str(cmalign_params))
     
     #Align sequences to alignment including alignment gaps.
     aligned, struct_string = cmalign_from_alignment(aln=template_alignment,\
         structure_string=struct,\
         seqs=int_map,\
         moltype=moltype,\
         include_aln=True,\
         params=cmalign_params,\
         cmbuild_params=cmbuild_params)
     
     #Pull out original sequences from full alignment.
     infernal_aligned={}
     aligned_dict = aligned.NamedSeqs
     for key in int_map.Names:
         infernal_aligned[int_keys.get(key,key)]=aligned_dict[key]
     
     #Create an Alignment object from alignment dict
     infernal_aligned = Alignment(infernal_aligned,MolType=moltype)
     
     if log_path is not None:
         log_file = open(log_path,'w')
         log_file.write('\n'.join(log_params))
         log_file.close()
     
     if result_path is not None:
         result_file = open(result_path,'w')
         result_file.write(infernal_aligned.toFasta())
         result_file.close()
         return None
     else:
         try:
             return infernal_aligned
         except ValueError:
             return {}