def read(handle, format): alphabet = IUPAC.unambiguous_dna counts = {} if format=="pfm": # reads the motif from Jaspar .pfm file letters = "ACGT" for letter, line in zip(letters, handle): words = line.split() #if there is a letter in the beginning, ignore it if words[0]==letter: words = words[1:] counts[letter] = map(float, words) motif = Motif(alphabet, counts=counts) elif format=="sites": # reads the motif from Jaspar .sites file instances = [] for line in handle: if not line.startswith(">"): break # line contains the header ">...." # now read the actual sequence line = handle.next() instance = "" for c in line.strip(): if c==c.upper(): instance += c instance = Seq(instance, alphabet) instances.append(instance) motif = Motif(alphabet, instances=instances) else: raise ValueError("Unknown format %s" % format) motif.mask = "*"*motif.length return motif
def read(handle): """read(handle)""" record = Record() line = handle.next() record.version = line.strip() line = handle.next() record.command = line.strip() for line in handle: line = line.strip() if line=="": pass elif line[:4]=="Para": record.parameters={} elif line[0]=="#": seq_name = line.split("\t")[1] record.sequences.append(seq_name) elif "=" in line: par_name, par_value = line.split("=") par_name = par_name.strip() par_value = par_value.strip() record.parameters[par_name]=par_value elif line[:5]=="Input": record.sequences=[] elif line[:5]=="Motif": words = line.split() assert words[0]=="Motif" number = int(words[1]) instances = [] elif line[:3]=="MAP": motif = Motif(IUPAC.unambiguous_dna, instances) motif.score = float(line.split()[-1]) motif.number = number motif.mask = mask record.append(motif) elif len(line.split("\t"))==4: seq = Seq(line.split("\t")[0],IUPAC.unambiguous_dna) instances.append(seq) elif "*" in line: mask = line.strip("\r\n") else: raise ValueError(line) return record