def read(self, input_handle): """Read patterns from the specified handle. """ all_patterns = [] while 1: cur_line = input_handle.readline() if not(cur_line): break cur_pattern = string.rstrip(cur_line) # split up signatures if cur_pattern.find(self.separator) >= 0: cur_pattern = tuple(cur_pattern.split(self.separator)) if self._alphabet is not None: # make single patterns (not signatures) into lists, so we # can check signatures and single patterns the same if type(cur_pattern) != type(tuple([])): test_pattern = [cur_pattern] else: test_pattern = cur_pattern for pattern_item in test_pattern: pattern_seq = Seq(pattern_item, self._alphabet) if not(utils.verify_alphabet(pattern_seq)): raise ValueError("Pattern %s not matching alphabet %s" % (cur_pattern, self._alphabet)) all_patterns.append(cur_pattern) return all_patterns
def read(self, input_handle): """Read patterns from the specified handle. """ all_patterns = [] while 1: cur_line = input_handle.readline() if not(cur_line): break cur_pattern = cur_line.rstrip() # split up signatures if cur_pattern.find(self.separator) >= 0: cur_pattern = tuple(cur_pattern.split(self.separator)) if self._alphabet is not None: # make single patterns (not signatures) into lists, so we # can check signatures and single patterns the same if type(cur_pattern) != type(tuple([])): test_pattern = [cur_pattern] else: test_pattern = cur_pattern for pattern_item in test_pattern: pattern_seq = Seq(pattern_item, self._alphabet) if not(utils.verify_alphabet(pattern_seq)): raise ValueError("Pattern %s not matching alphabet %s" % (cur_pattern, self._alphabet)) all_patterns.append(cur_pattern) return all_patterns
def _get_signature_dict(self, seq_records, sig_size, max_gap): """Return a dictionary with all signatures and their counts. This internal function does all of the hard work for the find_signatures function. """ if self._alphabet_strict: alphabet = seq_records[0].seq.alphabet else: alphabet = None # loop through all records to find signatures all_sigs = {} for seq_record in seq_records: # if we are working with alphabets, make sure we are consistent if alphabet is not None: assert seq_record.seq.alphabet == alphabet, \ "Working with alphabet %s and got %s" % \ (alphabet, seq_record.seq.alphabet) # now start finding signatures in the sequence largest_sig_size = sig_size * 2 + max_gap for start in range(len(seq_record.seq) - (largest_sig_size - 1)): # find the first part of the signature first_sig = seq_record.seq[start:start + sig_size].data # now find all of the second parts of the signature for second in range(start + 1, (start + 1) + max_gap): second_sig = seq_record.seq[second: second + sig_size].data # if we are being alphabet strict, make sure both parts # of the sig fall within the specified alphabet if alphabet is not None: first_seq = Seq(first_sig, alphabet) second_seq = Seq(second_sig, alphabet) if utils.verify_alphabet(first_seq) and \ utils.verify_alphabet(second_seq): all_sigs = self._add_sig(all_sigs, (first_sig, second_sig)) # if we are not being strict, just add the motif else: all_sigs = self._add_sig(all_sigs, (first_sig, second_sig)) return all_sigs
def _get_signature_dict(self, seq_records, sig_size, max_gap): """Return a dictionary with all signatures and their counts. This internal function does all of the hard work for the find_signatures function. """ if self._alphabet_strict: alphabet = seq_records[0].seq.alphabet else: alphabet = None # loop through all records to find signatures all_sigs = {} for seq_record in seq_records: # if we are working with alphabets, make sure we are consistent if alphabet is not None: assert seq_record.seq.alphabet == alphabet, \ "Working with alphabet %s and got %s" % \ (alphabet, seq_record.seq.alphabet) # now start finding signatures in the sequence largest_sig_size = sig_size * 2 + max_gap for start in range(len(seq_record.seq) - (largest_sig_size - 1)): # find the first part of the signature first_sig = seq_record.seq[start:start + sig_size].data # now find all of the second parts of the signature for second in range(start + 1, (start + 1) + max_gap): second_sig = seq_record.seq[second:second + sig_size].data # if we are being alphabet strict, make sure both parts # of the sig fall within the specified alphabet if alphabet is not None: first_seq = Seq(first_sig, alphabet) second_seq = Seq(second_sig, alphabet) if utils.verify_alphabet(first_seq) and \ utils.verify_alphabet(second_seq): all_sigs = self._add_sig(all_sigs, (first_sig, second_sig)) # if we are not being strict, just add the motif else: all_sigs = self._add_sig(all_sigs, (first_sig, second_sig)) return all_sigs
def _get_motif_dict(self, seq_records, motif_size): """Return a dictionary with information on motifs. This internal function essentially does all of the hard work for finding motifs, and returns a dictionary containing the found motifs and their counts. This is internal so it can be reused by find_motif_differences. """ if self.alphabet_strict: alphabet = seq_records[0].seq.alphabet else: alphabet = None # loop through all records to find the motifs in the sequences all_motifs = {} for seq_record in seq_records: # if we are working with alphabets, make sure we are consistent if alphabet is not None: assert seq_record.seq.alphabet == alphabet, \ "Working with alphabet %s and got %s" % \ (alphabet, seq_record.seq.alphabet) # now start finding motifs in the sequence for start in range(len(seq_record.seq) - (motif_size - 1)): motif = seq_record.seq[start:start + motif_size].data # if we are being alphabet strict, make sure the motif # falls within the specified alphabet if alphabet is not None: motif_seq = Seq(motif, alphabet) if utils.verify_alphabet(motif_seq): all_motifs = self._add_motif(all_motifs, motif) # if we are not being strict, just add the motif else: all_motifs = self._add_motif(all_motifs, motif) return all_motifs