Ejemplo n.º 1
0
    def test_MinimalRfamParser_strict_invalid_structure(self):
        """MinimalRfamParser: toggle strict functions w/ invalid structure
        """
        #strict = True
        self.assertRaises(RecordError,list,\
            MinimalRfamParser(self._fake_record_bad_structure_1))

        # strict = False
        self.assertEqual(list(MinimalRfamParser(\
            self._fake_record_bad_structure_1,strict=False))[0][2],None)                                
Ejemplo n.º 2
0
    def test_MinimalRfamParser_strict_invalid_sequence(self):
        """MinimalRfamParser: toggle strict functions w/ invalid seq
        """
        #strict = True
        self.assertRaises(RecordError,list,\
            MinimalRfamParser(self._fake_record_bad_sequence_1))

        # strict = False
        # you expect to get back as much information as possible, also
        # half records or sequences
        result = MinimalRfamParser(self._fake_record_bad_sequence_1,strict=False)
        self.assertEqual(len(list(MinimalRfamParser(\
            self._fake_record_bad_sequence_1,strict=False))[0][1].NamedSeqs), 3)            
Ejemplo n.º 3
0
    def test_MinimalRfamParser_w_valid_data(self):
        """MinimalRfamParser: integrity of output """

        # Some ugly constructions here, but this is what the output of
        # parsing fake_two_records should be
        headers = ['#=GF AC   RF00014','#=GF AU   Mifsud W']
        sequences =\
        {'U17136.1/898-984':\
        ''.join(['AACACAUCAGAUUUCCUGGUGUAACGAAUUUUUUAAGUGCUUCUUGCUUA',\
            'AGCAAGUUUCAUCCCGACCCCCUCAGGGUCGGGAUUU']),\
        'M15749.1/155-239':\
        ''.join(['AACGCAUCGGAUUUCCCGGUGUAACGAA-UUUUCAAGUGCUUCUUGCAUU',\
            'AGCAAGUUUGAUCCCGACUCCUG-CGAGUCGGGAUUU']),\
        'AF090431.1/222-139':\
        ''.join(['CUCACAUCAGAUUUCCUGGUGUAACGAA-UUUUCAAGUGCUUCUUGCAUA',\
            'AGCAAGUUUGAUCCCGACCCGU--AGGGCCGGGAUUU'])}

        structure = WussStructure(''.join(\
        ['...<<<<<<<.....>>>>>>>....................<<<<<...',\
        '.>>>>>....<<<<<<<<<<.....>>>>>>>>>>..']))
        
        data = []
        for r in MinimalRfamParser(self._fake_two_records, strict=False):
            data.append(r)
        self.assertEqual(data[0],(headers,sequences,structure))
        assert isinstance(data[0][1],Alignment)

        # This line tests that invalid entries are ignored when strict=False
        # Note, there are two records in self._fake_two_records, but 2nd is
        # invalid
        self.assertEqual(len(data),1)            
Ejemplo n.º 4
0
    def test_MinimalRfamParser_strict_missing_fields(self):
        """MinimalRfamParser: toggle strict functions w/ missing fields"""
        # strict = True
        
        self.assertRaises(RecordError,list,\
            MinimalRfamParser(self._fake_record_no_sequences))
        
        self.assertRaises(RecordError,list,\
            MinimalRfamParser(self._fake_record_no_structure))

        # strict = False
        # no header shouldn't be a problem
        self.assertEqual(list(MinimalRfamParser(self._fake_record_no_headers,\
            strict=False)), [([],{'Z11765.1/1-89':'GGUC'},'............>>>')])
        # should get empty on missing sequence or missing structure
        self.assertEqual(list(MinimalRfamParser(self._fake_record_no_sequences,\
            strict=False)), [])
        self.assertEqual(list(MinimalRfamParser(self._fake_record_no_structure,\
            strict=False)), [])
Ejemplo n.º 5
0
 def test_RfamParser_strict_invalid_sequences(self):
     """RfamParser: functions when toggling strict w/ record w/ bad seq
     """
     self.assertRaises(RecordError,list,
         MinimalRfamParser(self._fake_record_bad_sequence_1))
         
     # strict = False
     # in 'False' mode you expect to get back as much as possible, also
     # parts of sequences
     self.assertEqual(len(list(RfamParser(self._fake_record_bad_sequence_1,\
         strict=False))[0][1].NamedSeqs), 3)           
Ejemplo n.º 6
0
 def __call__(self, seq_path, result_path=None, log_path=None, \
     failure_path=None, cmbuild_params=None, cmalign_params=None):
     
     log_params = []
     # load candidate sequences
     candidate_sequences = dict(MinimalFastaParser(open(seq_path,'U')))
     
     # load template sequences
     try:
         info, template_alignment, struct = list(MinimalRfamParser(open(\
             self.Params['template_filepath'],'U'),\
             seq_constructor=ChangedSequence))[0]
     except RecordError:
         raise ValueError, "Template alignment must be in Stockholm format with corresponding secondary structure annotation when using InfernalAligner."
     
     moltype = self.Params['moltype']
     
     #Need to make separate mapping for unaligned sequences
     unaligned = SequenceCollection(candidate_sequences,MolType=moltype)
     int_map, int_keys = unaligned.getIntMap(prefix='unaligned_')
     int_map = SequenceCollection(int_map,MolType=moltype)
     
     #Turn on --gapthresh option in cmbuild to force alignment to full model
     if cmbuild_params is None:
         cmbuild_params = {}
     cmbuild_params.update({'--gapthresh':1.0})
     
     #record cmbuild parameters
     log_params.append('cmbuild parameters:')
     log_params.append(str(cmbuild_params))
     
     #Turn on --sub option in Infernal, since we know the unaligned sequences
     # are fragments.
     #Also turn on --gapthresh to use same gapthresh as was used to build
     # model
     
     if cmalign_params is None:
         cmalign_params = {}
     cmalign_params.update({'--sub':True,'--gapthresh':1.0})
     
     #record cmalign parameters
     log_params.append('cmalign parameters:')
     log_params.append(str(cmalign_params))
     
     #Align sequences to alignment including alignment gaps.
     aligned, struct_string = cmalign_from_alignment(aln=template_alignment,\
         structure_string=struct,\
         seqs=int_map,\
         moltype=moltype,\
         include_aln=True,\
         params=cmalign_params,\
         cmbuild_params=cmbuild_params)
     
     #Pull out original sequences from full alignment.
     infernal_aligned={}
     aligned_dict = aligned.NamedSeqs
     for key in int_map.Names:
         infernal_aligned[int_keys.get(key,key)]=aligned_dict[key]
     
     #Create an Alignment object from alignment dict
     infernal_aligned = Alignment(infernal_aligned,MolType=moltype)
     
     if log_path is not None:
         log_file = open(log_path,'w')
         log_file.write('\n'.join(log_params))
         log_file.close()
     
     if result_path is not None:
         result_file = open(result_path,'w')
         result_file.write(infernal_aligned.toFasta())
         result_file.close()
         return None
     else:
         try:
             return infernal_aligned
         except ValueError:
             return {}
Ejemplo n.º 7
0
    def __call__(self, seq_path, result_path=None, log_path=None,
                 failure_path=None, cmbuild_params=None, cmalign_params=None):

        log_params = []
        # load candidate sequences
        candidate_sequences = dict(parse_fasta(open(seq_path, 'U')))

        # load template sequences
        try:
            info, template_alignment, struct = list(MinimalRfamParser(open(
                self.Params['template_filepath'], 'U'),
                seq_constructor=ChangedSequence))[0]
        except RecordError:
            raise ValueError(
                "Template alignment must be in Stockholm format with corresponding secondary structure annotation when using InfernalAligner.")

        # Need to make separate mapping for unaligned sequences
        unaligned = SequenceCollection.from_fasta_records(
            candidate_sequences.iteritems(), DNASequence)
        mapped_seqs, new_to_old_ids = unaligned.int_map(prefix='unaligned_')
        mapped_seq_tuples = [(k, str(v)) for k,v in mapped_seqs.iteritems()]

        # Turn on --gapthresh option in cmbuild to force alignment to full
        # model
        if cmbuild_params is None:
            cmbuild_params = {}
        cmbuild_params.update({'--gapthresh': 1.0})

        # record cmbuild parameters
        log_params.append('cmbuild parameters:')
        log_params.append(str(cmbuild_params))

        # Turn on --sub option in Infernal, since we know the unaligned sequences
        # are fragments.
        # Also turn on --gapthresh to use same gapthresh as was used to build
        # model
        if cmalign_params is None:
            cmalign_params = {}
        cmalign_params.update({'--sub': True, '--gapthresh': 1.0})

        # record cmalign parameters
        log_params.append('cmalign parameters:')
        log_params.append(str(cmalign_params))

        # Align sequences to alignment including alignment gaps.
        aligned, struct_string = cmalign_from_alignment(aln=template_alignment,
                                                        structure_string=struct,
                                                        seqs=mapped_seq_tuples,
                                                        include_aln=True,
                                                        params=cmalign_params,
                                                        cmbuild_params=cmbuild_params)

        # Pull out original sequences from full alignment.
        infernal_aligned = []
        # Get a dict of the ids to sequences (note that this is a
        # cogent alignment object, hence the call to NamedSeqs)
        aligned_dict = aligned.NamedSeqs
        for n, o in new_to_old_ids.iteritems():
            aligned_seq = aligned_dict[n]
            infernal_aligned.append((o, aligned_seq))

        # Create an Alignment object from alignment dict
        infernal_aligned = Alignment.from_fasta_records(infernal_aligned, DNASequence)

        if log_path is not None:
            log_file = open(log_path, 'w')
            log_file.write('\n'.join(log_params))
            log_file.close()

        if result_path is not None:
            result_file = open(result_path, 'w')
            result_file.write(infernal_aligned.to_fasta())
            result_file.close()
            return None
        else:
            try:
                return infernal_aligned
            except ValueError:
                return {}