def read(handle, format): alphabet = IUPAC.unambiguous_dna counts = {} if format == "pfm": # reads the motif from Jaspar .pfm file letters = "ACGT" for letter, line in zip(letters, handle): words = line.split() #if there is a letter in the beginning, ignore it if words[0] == letter: words = words[1:] counts[letter] = map(float, words) motif = Motif(alphabet, counts=counts) elif format == "sites": # reads the motif from Jaspar .sites file instances = [] for line in handle: if not line.startswith(">"): break # line contains the header ">...." # now read the actual sequence line = handle.next() instance = "" for c in line.strip(): if c == c.upper(): instance += c instance = Seq(instance, alphabet) instances.append(instance) instances = Instances(instances, alphabet) motif = Motif(alphabet, instances=instances) else: raise ValueError("Unknown format %s" % format) motif.mask = "*" * motif.length return motif
def read(handle, format): alphabet = IUPAC.unambiguous_dna counts = {} if format=="pfm": # reads the motif from Jaspar .pfm file letters = "ACGT" for letter, line in zip(letters, handle): words = line.split() #if there is a letter in the beginning, ignore it if words[0]==letter: words = words[1:] counts[letter] = map(float, words) motif = Motif(alphabet, counts=counts) elif format=="sites": # reads the motif from Jaspar .sites file instances = [] for line in handle: if not line.startswith(">"): break # line contains the header ">...." # now read the actual sequence line = handle.next() instance = "" for c in line.strip(): if c==c.upper(): instance += c instance = Seq(instance, alphabet) instances.append(instance) instances = Instances(instances, alphabet) motif = Motif(alphabet, instances=instances) else: raise ValueError("Unknown format %s" % format) motif.mask = "*"*motif.length return motif
def read(handle): """read(handle)""" record = Record() line = next(handle) record.version = line.strip() line = next(handle) record.command = line.strip() for line in handle: line = line.strip() if line == "": pass elif line[:4] == "Para": record.parameters = {} elif line[0] == "#": seq_name = line.split("\t")[1] record.sequences.append(seq_name) elif "=" in line: par_name, par_value = line.split("=") par_name = par_name.strip() par_value = par_value.strip() record.parameters[par_name] = par_value elif line[:5] == "Input": record.sequences = [] elif line[:5] == "Motif": words = line.split() assert words[0] == "Motif" number = int(words[1]) instances = [] elif line[:3] == "MAP": alphabet = IUPAC.unambiguous_dna instances = Instances(instances, alphabet) motif = Motif(alphabet, instances) motif.score = float(line.split()[-1]) motif.number = number motif.mask = mask record.append(motif) elif len(line.split("\t")) == 4: seq = Seq(line.split("\t")[0], IUPAC.unambiguous_dna) instances.append(seq) elif "*" in line: mask = line.strip("\r\n") else: raise ValueError(line) return record