Beispiel #1
0
    def read(self, input_handle):
        """Read patterns from the specified handle.
        """
        all_patterns = []

        while True:
            cur_line = input_handle.readline()

            if not (cur_line):
                break

            cur_pattern = cur_line.rstrip()
            # split up signatures
            if self.separator in cur_pattern:
                cur_pattern = tuple(cur_pattern.split(self.separator))

            if self._alphabet is not None:
                # make single patterns (not signatures) into lists, so we
                # can check signatures and single patterns the same
                if not isinstance(cur_pattern, tuple):
                    test_pattern = [cur_pattern]
                else:
                    test_pattern = cur_pattern
                for pattern_item in test_pattern:
                    pattern_seq = Seq(pattern_item, self._alphabet)
                    if not (_verify_alphabet(pattern_seq)):
                        raise ValueError(
                            "Pattern %s not matching alphabet %s" %
                            (cur_pattern, self._alphabet))

            all_patterns.append(cur_pattern)

        return all_patterns
Beispiel #2
0
    def read(self, input_handle):
        """Read patterns from the specified handle.
        """
        all_patterns = []

        while True:
            cur_line = input_handle.readline()

            if not(cur_line):
                break

            cur_pattern = cur_line.rstrip()
            # split up signatures
            if self.separator in cur_pattern:
                cur_pattern = tuple(cur_pattern.split(self.separator))

            if self._alphabet is not None:
                # make single patterns (not signatures) into lists, so we
                # can check signatures and single patterns the same
                if not isinstance(cur_pattern, tuple):
                    test_pattern = [cur_pattern]
                else:
                    test_pattern = cur_pattern
                for pattern_item in test_pattern:
                    pattern_seq = Seq(pattern_item, self._alphabet)
                    if not(_verify_alphabet(pattern_seq)):
                        raise ValueError("Pattern %s not matching alphabet %s"
                                         % (cur_pattern, self._alphabet))

            all_patterns.append(cur_pattern)

        return all_patterns
Beispiel #3
0
    def _get_signature_dict(self, seq_records, sig_size, max_gap):
        """Return a dictionary with all signatures and their counts.

        This internal function does all of the hard work for the
        find_signatures function.
        """
        if self._alphabet_strict:
            alphabet = seq_records[0].seq.alphabet
        else:
            alphabet = None

        # loop through all records to find signatures
        all_sigs = {}
        for seq_record in seq_records:
            # if we are working with alphabets, make sure we are consistent
            if alphabet is not None:
                assert seq_record.seq.alphabet == alphabet, \
                       "Working with alphabet %s and got %s" % \
                       (alphabet, seq_record.seq.alphabet)

            # now start finding signatures in the sequence
            largest_sig_size = sig_size * 2 + max_gap
            for start in range(len(seq_record.seq) - (largest_sig_size - 1)):
                # find the first part of the signature
                first_sig = str(seq_record.seq[start:start + sig_size])

                # now find all of the second parts of the signature
                for second in range(start + 1, (start + 1) + max_gap):
                    second_sig = str(seq_record.seq[second:second + sig_size])

                    # if we are being alphabet strict, make sure both parts
                    # of the sig fall within the specified alphabet
                    if alphabet is not None:
                        first_seq = Seq(first_sig, alphabet)
                        second_seq = Seq(second_sig, alphabet)
                        if _verify_alphabet(first_seq) \
                        and _verify_alphabet(second_seq):
                            all_sigs = self._add_sig(all_sigs,
                                                     (first_sig, second_sig))

                    # if we are not being strict, just add the motif
                    else:
                        all_sigs = self._add_sig(all_sigs,
                                                 (first_sig, second_sig))

        return all_sigs
Beispiel #4
0
    def _get_signature_dict(self, seq_records, sig_size, max_gap):
        """Return a dictionary with all signatures and their counts.

        This internal function does all of the hard work for the
        find_signatures function.
        """
        if self._alphabet_strict:
            alphabet = seq_records[0].seq.alphabet
        else:
            alphabet = None

        # loop through all records to find signatures
        all_sigs = {}
        for seq_record in seq_records:
            # if we are working with alphabets, make sure we are consistent
            if alphabet is not None:
                assert seq_record.seq.alphabet == alphabet, \
                       "Working with alphabet %s and got %s" % \
                       (alphabet, seq_record.seq.alphabet)

            # now start finding signatures in the sequence
            largest_sig_size = sig_size * 2 + max_gap
            for start in range(len(seq_record.seq) - (largest_sig_size - 1)):
                # find the first part of the signature
                first_sig = str(seq_record.seq[start:start + sig_size])

                # now find all of the second parts of the signature
                for second in range(start + 1, (start + 1) + max_gap):
                    second_sig = str(seq_record.seq[second: second + sig_size])

                    # if we are being alphabet strict, make sure both parts
                    # of the sig fall within the specified alphabet
                    if alphabet is not None:
                        first_seq = Seq(first_sig, alphabet)
                        second_seq = Seq(second_sig, alphabet)
                        if _verify_alphabet(first_seq) \
                        and _verify_alphabet(second_seq):
                            all_sigs = self._add_sig(all_sigs,
                                                     (first_sig, second_sig))

                    # if we are not being strict, just add the motif
                    else:
                        all_sigs = self._add_sig(all_sigs,
                                                 (first_sig, second_sig))

        return all_sigs
Beispiel #5
0
    def _get_motif_dict(self, seq_records, motif_size):
        """Return a dictionary with information on motifs.

        This internal function essentially does all of the hard work for
        finding motifs, and returns a dictionary containing the found motifs
        and their counts. This is internal so it can be reused by
        find_motif_differences.
        """
        if self.alphabet_strict:
            alphabet = seq_records[0].seq.alphabet
        else:
            alphabet = None

        # loop through all records to find the motifs in the sequences
        all_motifs = {}
        for seq_record in seq_records:
            # if we are working with alphabets, make sure we are consistent
            if alphabet is not None:
                assert seq_record.seq.alphabet == alphabet, \
                       "Working with alphabet %s and got %s" % \
                       (alphabet, seq_record.seq.alphabet)

            # now start finding motifs in the sequence
            for start in range(len(seq_record.seq) - (motif_size - 1)):
                motif = str(seq_record.seq[start:start + motif_size])

                # if we are being alphabet strict, make sure the motif
                # falls within the specified alphabet
                if alphabet is not None:
                    motif_seq = Seq(motif, alphabet)
                    if _verify_alphabet(motif_seq):
                        all_motifs = self._add_motif(all_motifs, motif)

                # if we are not being strict, just add the motif
                else:
                    all_motifs = self._add_motif(all_motifs, motif)

        return all_motifs
Beispiel #6
0
    def _get_motif_dict(self, seq_records, motif_size):
        """Return a dictionary with information on motifs.

        This internal function essentially does all of the hard work for
        finding motifs, and returns a dictionary containing the found motifs
        and their counts. This is internal so it can be reused by
        find_motif_differences.
        """
        if self.alphabet_strict:
            alphabet = seq_records[0].seq.alphabet
        else:
            alphabet = None

        # loop through all records to find the motifs in the sequences
        all_motifs = {}
        for seq_record in seq_records:
            # if we are working with alphabets, make sure we are consistent
            if alphabet is not None:
                assert seq_record.seq.alphabet == alphabet, \
                       "Working with alphabet %s and got %s" % \
                       (alphabet, seq_record.seq.alphabet)

            # now start finding motifs in the sequence
            for start in range(len(seq_record.seq) - (motif_size - 1)):
                motif = str(seq_record.seq[start:start + motif_size])

                # if we are being alphabet strict, make sure the motif
                # falls within the specified alphabet
                if alphabet is not None:
                    motif_seq = Seq(motif, alphabet)
                    if _verify_alphabet(motif_seq):
                        all_motifs = self._add_motif(all_motifs, motif)

                # if we are not being strict, just add the motif
                else:
                    all_motifs = self._add_motif(all_motifs, motif)

        return all_motifs