Esempio n. 1
0
 def block2pssm(self, block_data, seq):
     pssm_info = []
     for i in range(len(block_data)):
         score_dict = {}
         for a in self.alphabet:
             score_dict[a] = block_data[i][self.order[a]]
         pssm_info.append((seq[i], score_dict))
     return PSSM(pssm_info)
Esempio n. 2
0
    def from_sequences(
        cls,
        sequences,
        name="unnamed",
        pseudocounts="jaspar",
        threshold=None,
        relative_threshold=None,
    ):
        """Return a PSSM pattern computed from same-length sequences.

        Parameters
        ----------

        sequences
          A list of same-length sequences

        name
          Name to give to the pattern (will appear in reports etc.)

        pseudocounts
          Either a dict {"A": 0.01, "T": ...} or "jaspar" for automatic
          pseudocounts from the Biopython.motifs.jaspar module (recommended),
          or None for no pseudocounts at all (not recommended!)

        threshold
          locations of the sequence with a PSSM score above this value will be
          considered matches. For convenience, a relative_threshold can be
          given instead.

        relative_threshold
          Value between 0 and 1 from which the threshold will be auto-computed.
          0 means "match everything", 1 means "only match the one (or several)
          sequence(s) with the absolute highest possible score".
        """
        sequences = [Seq(s) for s in sequences]
        motif = motifs.create(sequences)
        cls.apply_pseudocounts(motif, pseudocounts)
        pssm = PSSM(motif.pssm)
        pssm.name = name
        return MotifPssmPattern(
            pssm=pssm,
            threshold=threshold,
            relative_threshold=relative_threshold,
        )
def parse_pssm(fname):
    pssm = []
    with open(fname) as f:
        for line in f.readlines():
            token = line.rstrip('\r\n').split()
            if len(token) == 0:
                continue
            if re.match(r'\d+', token[0]):
                pssm.append(
                    (token[1],
                     dict((x, int(y)) for x, y in zip(AA, token[2:22]))))
    return PSSM(pssm)