def print_meme_header(alph):
    sys.stdout.write("\nMEME version 4\n\n")
    if alph == alphabet.getByName("DNA") or alph == alphabet.getByName("Protein"):
        sys.stdout.write("ALPHABET= {}\n\n".format("".join(alph.getSymbols())))
    else:
        sys.stdout.write("ALPHABET {}\n".format(json.dumps(alph.getName())))
        sys.stdout.write(alph.asText())
        sys.stdout.write("END ALPHABET\n\n")
    if alph.isComplementable():
        sys.stdout.write("strands: + -\n\n")
    sys.stdout.write("Background letter frequencies (from uniform background):\n")
    freq = 1.0 / alph.getLen()
    for sym in alph.getSymbols():
        sys.stdout.write("{:s} {:.4f} ".format(sym, freq))
    sys.stdout.write("\n");
Ejemplo n.º 2
0
def print_meme_header(alph):
    sys.stdout.write("\nMEME version 4\n\n")
    if alph == alphabet.getByName("DNA") or alph == alphabet.getByName("Protein"):
        sys.stdout.write("ALPHABET= {}\n\n".format("".join(alph.getSymbols())))
    else:
        sys.stdout.write("ALPHABET {}\n".format(json.dumps(alph.getName())))
        sys.stdout.write(alph.asText())
        sys.stdout.write("END ALPHABET\n\n")
    if alph.isComplementable():
        sys.stdout.write("strands: + -\n\n")
    sys.stdout.write("Background letter frequencies (from uniform background):\n")
    freq = 1.0 / alph.getLen()
    for sym in alph.getSymbols():
        sys.stdout.write("{:s} {:.4f} ".format(sym, freq))
    sys.stdout.write("\n");
Ejemplo n.º 3
0
 def __init__(self, sequence, alpha=None, name="", seqinfo=""):
     """Create a sequence with sequence data.
     Specifying the alphabet is optional, so is the name and info.
     Example:
     >>> myseq = sequence.Sequence('MVSAKKVPAIAMSFGVSF')
     will create a sequence with name "", and assign one of the predefined alphabets on basis of what symbols were used.
     >>> myseq.getAlphabet().getSymbols()
     will most likely output the standard protein alphabet:
     ('A', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'K', 'L', 'M', 'N', 'P', 'Q', 'R', 'S', 'T', 'V', 'W', 'Y')
     """
     self.name = name
     self.info = seqinfo
     if type(sequence) is str:
         self.data = tuple(sequence)
     elif type(sequence) is tuple:
         self.data = sequence
     elif type(sequence) is list:
         self.data = tuple(sequence)
     else:
         raise RuntimeError(
             "Sequence data is not specified correctly: must be string or tuple"
         )
     # Resolve choice of alphabet
     if alpha == None:
         # Alphabet is not set, attempt to set it automatically...
         alpha = alphabet.getBySeq(self.data)
         if alpha == None:
             raise RuntimeError("Could not identify alphabet from sequence")
     elif isinstance(alpha, basestring):
         alphaname = alpha
         alpha = alphabet.getByName(alphaname)
         if alpha == None:
             raise RuntimeError("No predefined alphabet with name \"" +
                                alphaname + "\"")
         if not (alpha.isValidString(self.data)):
             raise RuntimeError("Invalid alphabet specified: " +
                                "".join(alpha.getSymbols()) +
                                " is not compatible with sequence '" +
                                "".join(self.data) + "'")
     elif isinstance(alpha, alphabet.Alphabet):
         if not (alpha.isValidString(self.data)):
             raise RuntimeError("Invalid alphabet specified: " +
                                "".join(alpha.getSymbols()) +
                                " is not compatible with sequence '" +
                                "".join(self.data) + "'")
     else:
         raise RuntimeError("Unexpected type for alpha")
Ejemplo n.º 4
0
 def __init__(self, sequence, alpha = None, name = "", seqinfo = ""):
     """Create a sequence with sequence data.
     Specifying the alphabet is optional, so is the name and info.
     Example:
     >>> myseq = sequence.Sequence('MVSAKKVPAIAMSFGVSF')
     will create a sequence with name "", and assign one of the predefined alphabets on basis of what symbols were used.
     >>> myseq.getAlphabet().getSymbols()
     will most likely output the standard protein alphabet:
     ('A', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'K', 'L', 'M', 'N', 'P', 'Q', 'R', 'S', 'T', 'V', 'W', 'Y')
     """
     self.name = name
     self.info = seqinfo
     if type(sequence) is str:
         self.data = tuple(sequence)
     elif type(sequence) is tuple:
         self.data = sequence
     elif type(sequence) is list:
         self.data = tuple(sequence)
     else:
         raise RuntimeError("Sequence data is not specified correctly: must be string or tuple")
     # Resolve choice of alphabet
     if alpha == None:
         # Alphabet is not set, attempt to set it automatically...
         alpha = alphabet.getBySeq(self.data)
         if alpha == None:
             raise RuntimeError("Could not identify alphabet from sequence")
     elif isinstance(alpha, basestring):
         alphaname = alpha
         alpha = alphabet.getByName(alphaname)
         if alpha == None:
             raise RuntimeError("No predefined alphabet with name \"" + alphaname + "\"")
         if not(alpha.isValidString(self.data)):
             raise RuntimeError("Invalid alphabet specified: "+"".join(alpha.getSymbols())+" is not compatible with sequence '"+"".join(self.data)+"'")
     elif isinstance(alpha, alphabet.Alphabet):
         if not(alpha.isValidString(self.data)):
             raise RuntimeError("Invalid alphabet specified: "+"".join(alpha.getSymbols())+" is not compatible with sequence '"+"".join(self.data)+"'")
     else:
         raise RuntimeError("Unexpected type for alpha")
Ejemplo n.º 5
0
def convert_ambigs(strings, alph):
    """Convert aliases to prime symbol and ambiguous to wildcard
       in each of a list of strings.  Changes are made in place.
    """
    ms = alph.translator(False)
    for i in range(len(strings)):
        strings[i] = strings[i].translate(ms)
    return(strings)


#------------------ Main method -------------------
# Executed if you run this file from the operating system prompt, e.g.
# > python sequence.py

if __name__=='__main__':
    alpha = alphabet.getByName('DNA')
    #seqs = readFASTA('pos.fasta')
    seqs = []
    aln = readStrings('tmp0')
    #regexp = RegExp(alpha, '[AG]G.[DE]TT[AS].')
    pwm = PWM(alpha)
    pwm.setFromAlignment(aln)
    for row in pwm.pretty():
        print row
    for s in seqs:
        print s.getName(), s.getLen(), s.getAlphabet().getSymbols()
        for m in regexp.match( s ):
            print "pos: %d pat: %s %4.2f" % (m[0], m[1], m[2])
        for m in pwm.match( s ):
            print "pos: %d pat: %s %4.2f" % (m[0], m[1], m[2])
Ejemplo n.º 6
0
def convert_ambigs(strings, alph):
    """Convert aliases to prime symbol and ambiguous to wildcard
       in each of a list of strings.  Changes are made in place.
    """
    ms = alph.translator(False)
    for i in range(len(strings)):
        strings[i] = strings[i].translate(ms)
    return (strings)


#------------------ Main method -------------------
# Executed if you run this file from the operating system prompt, e.g.
# > python sequence.py

if __name__ == '__main__':
    alpha = alphabet.getByName('DNA')
    #seqs = readFASTA('pos.fasta')
    seqs = []
    aln = readStrings('tmp0')
    #regexp = RegExp(alpha, '[AG]G.[DE]TT[AS].')
    pwm = PWM(alpha)
    pwm.setFromAlignment(aln)
    for row in pwm.pretty():
        print row
    for s in seqs:
        print s.getName(), s.getLen(), s.getAlphabet().getSymbols()
        for m in regexp.match(s):
            print "pos: %d pat: %s %4.2f" % (m[0], m[1], m[2])
        for m in pwm.match(s):
            print "pos: %d pat: %s %4.2f" % (m[0], m[1], m[2])