def __call__(self, seq_path, result_path=None, log_path=None, failure_path=None, cmbuild_params=None, cmalign_params=None): log_params = [] # load candidate sequences candidate_sequences = dict(parse_fasta(open(seq_path, 'U'))) # load template sequences try: info, template_alignment, struct = list(MinimalRfamParser(open( self.Params['template_filepath'], 'U'), seq_constructor=ChangedSequence))[0] except RecordError: raise ValueError( "Template alignment must be in Stockholm format with corresponding secondary structure annotation when using InfernalAligner.") moltype = self.Params['moltype'] # Need to make separate mapping for unaligned sequences unaligned = SequenceCollection.from_fasta_records( candidate_sequences.iteritems(), DNASequence) mapped_seqs, new_to_old_ids = unaligned.int_map(prefix='unaligned_') mapped_seq_tuples = [(k, str(v)) for k,v in mapped_seqs.iteritems()] # Turn on --gapthresh option in cmbuild to force alignment to full # model if cmbuild_params is None: cmbuild_params = {} cmbuild_params.update({'--gapthresh': 1.0}) # record cmbuild parameters log_params.append('cmbuild parameters:') log_params.append(str(cmbuild_params)) # Turn on --sub option in Infernal, since we know the unaligned sequences # are fragments. # Also turn on --gapthresh to use same gapthresh as was used to build # model if cmalign_params is None: cmalign_params = {} cmalign_params.update({'--sub': True, '--gapthresh': 1.0}) # record cmalign parameters log_params.append('cmalign parameters:') log_params.append(str(cmalign_params)) # Align sequences to alignment including alignment gaps. aligned, struct_string = cmalign_from_alignment(aln=template_alignment, structure_string=struct, seqs=mapped_seq_tuples, moltype=moltype, include_aln=True, params=cmalign_params, cmbuild_params=cmbuild_params) # Pull out original sequences from full alignment. infernal_aligned = [] # Get a dict of the identifiers to sequences (note that this is a # cogent alignment object, hence the call to NamedSeqs) aligned_dict = aligned.NamedSeqs for n, o in new_to_old_ids.iteritems(): aligned_seq = aligned_dict[n] infernal_aligned.append((o, aligned_seq)) # Create an Alignment object from alignment dict infernal_aligned = Alignment.from_fasta_records(infernal_aligned, DNASequence) if log_path is not None: log_file = open(log_path, 'w') log_file.write('\n'.join(log_params)) log_file.close() if result_path is not None: result_file = open(result_path, 'w') result_file.write(infernal_aligned.to_fasta()) result_file.close() return None else: try: return infernal_aligned except ValueError: return {}
def __call__(self, seq_path, result_path=None, log_path=None, failure_path=None, cmbuild_params=None, cmalign_params=None): log_params = [] # load candidate sequences candidate_sequences = dict(parse_fasta(open(seq_path, 'U'))) # load template sequences try: info, template_alignment, struct = list(MinimalRfamParser(open( self.Params['template_filepath'], 'U'), seq_constructor=ChangedSequence))[0] except RecordError: raise ValueError( "Template alignment must be in Stockholm format with corresponding secondary structure annotation when using InfernalAligner.") moltype = self.Params['moltype'] # Need to make separate mapping for unaligned sequences unaligned = SequenceCollection(candidate_sequences, MolType=moltype) int_map, int_keys = unaligned.getIntMap(prefix='unaligned_') int_map = SequenceCollection(int_map, MolType=moltype) # Turn on --gapthresh option in cmbuild to force alignment to full # model if cmbuild_params is None: cmbuild_params = {} cmbuild_params.update({'--gapthresh': 1.0}) # record cmbuild parameters log_params.append('cmbuild parameters:') log_params.append(str(cmbuild_params)) # Turn on --sub option in Infernal, since we know the unaligned sequences # are fragments. # Also turn on --gapthresh to use same gapthresh as was used to build # model if cmalign_params is None: cmalign_params = {} cmalign_params.update({'--sub': True, '--gapthresh': 1.0}) # record cmalign parameters log_params.append('cmalign parameters:') log_params.append(str(cmalign_params)) # Align sequences to alignment including alignment gaps. aligned, struct_string = cmalign_from_alignment(aln=template_alignment, structure_string=struct, seqs=int_map, moltype=moltype, include_aln=True, params=cmalign_params, cmbuild_params=cmbuild_params) # Pull out original sequences from full alignment. infernal_aligned = {} aligned_dict = aligned.NamedSeqs for key in int_map.Names: infernal_aligned[int_keys.get(key, key)] = aligned_dict[key] # Create an Alignment object from alignment dict infernal_aligned = Alignment(infernal_aligned, MolType=moltype) if log_path is not None: log_file = open(log_path, 'w') log_file.write('\n'.join(log_params)) log_file.close() if result_path is not None: result_file = open(result_path, 'w') result_file.write(infernal_aligned.toFasta()) result_file.close() return None else: try: return infernal_aligned except ValueError: return {}