def read(handle, format): alphabet = IUPAC.unambiguous_dna counts = {} if format == "pfm": # reads the motif from Jaspar .pfm file letters = "ACGT" for letter, line in zip(letters, handle): words = line.split() #if there is a letter in the beginning, ignore it if words[0] == letter: words = words[1:] counts[letter] = map(float, words) motif = Motif(alphabet, counts=counts) elif format == "sites": # reads the motif from Jaspar .sites file instances = [] for line in handle: if not line.startswith(">"): break # line contains the header ">...." # now read the actual sequence line = handle.next() instance = "" for c in line.strip(): if c == c.upper(): instance += c instance = Seq(instance, alphabet) instances.append(instance) motif = Motif(alphabet, instances=instances) else: raise ValueError("Unknown format %s" % format) motif.mask = "*" * motif.length return motif
def read(handle): """read(handle)""" record = Record() record.ver = next(handle) record.cmd_line = next(handle) for line in handle: if line.strip() == "": pass elif line[:4] == "Para": record.param_dict = {} elif line[0] == "#": seq_name = line.split("\t")[1] record.seq_dict.append(seq_name) elif "=" in line: par_name = line.split("=")[0].strip() par_value = line.split("=")[1].strip() record.param_dict[par_name] = par_value elif line[:5] == "Input": record.seq_dict = [] elif line[:5] == "Motif": record.current_motif = Motif() record.motifs.append(record.current_motif) record.current_motif.alphabet = IUPAC.unambiguous_dna elif line[:3] == "MAP": record.current_motif.score = float(line.split()[-1]) elif len(line.split("\t")) == 4: seq = Seq(line.split("\t")[0], IUPAC.unambiguous_dna) record.current_motif.add_instance(seq) elif "*" in line: record.current_motif.set_mask(line.strip("\n\c")) else: raise ValueError(line) return record
def construct_weblogo(self, weblogo_filename, weblogo_revcompl_filename): self.weblogo_basename = os.path.basename(weblogo_filename) self.weblogo_revcompl_basename = os.path.basename( weblogo_revcompl_filename) motif = Motif(alphabet=IUPAC.unambiguous_dna) for kmer in self.kmer_lst: motif.add_instance(Seq(kmer, motif.alphabet)) logowidth_normal = self.construct_weblogo_helper( weblogo_filename, motif) #reverse complement motif_revcompl = motif.reverse_complement() logowidth_revcompl = self.construct_weblogo_helper( weblogo_revcompl_filename, motif_revcompl) self.logowidth = max(self.logowidth, logowidth_normal, logowidth_revcompl)
def read(handle): """read(handle)""" record = Record() line = handle.next() record.version = line.strip() line = handle.next() record.command = line.strip() for line in handle: line = line.strip() if line == "": pass elif line[:4] == "Para": record.parameters = {} elif line[0] == "#": seq_name = line.split("\t")[1] record.sequences.append(seq_name) elif "=" in line: par_name, par_value = line.split("=") par_name = par_name.strip() par_value = par_value.strip() record.parameters[par_name] = par_value elif line[:5] == "Input": record.sequences = [] elif line[:5] == "Motif": words = line.split() assert words[0] == "Motif" number = int(words[1]) instances = [] elif line[:3] == "MAP": motif = Motif(IUPAC.unambiguous_dna, instances) motif.score = float(line.split()[-1]) motif.number = number motif.set_mask(mask) record.motifs.append(motif) elif len(line.split("\t")) == 4: seq = Seq(line.split("\t")[0], IUPAC.unambiguous_dna) instances.append(seq) elif "*" in line: mask = line.strip("\r\n") else: raise ValueError(line) return record
def read(handle): """read(handle)""" record = Record() line = handle.next() record.version = line.strip() line = handle.next() record.command = line.strip() for line in handle: line = line.strip() if line == "": pass elif line[:4] == "Para": record.parameters = {} elif line[0] == "#": seq_name = line.split("\t")[1] record.sequences.append(seq_name) elif "=" in line: par_name, par_value = line.split("=") par_name = par_name.strip() par_value = par_value.strip() record.parameters[par_name] = par_value elif line[:5] == "Input": record.sequences = [] elif line[:5] == "Motif": current_motif = Motif() current_motif.alphabet = IUPAC.unambiguous_dna record.motifs.append(current_motif) elif line[:3] == "MAP": current_motif.score = float(line.split()[-1]) elif len(line.split("\t")) == 4: seq = Seq(line.split("\t")[0], IUPAC.unambiguous_dna) current_motif.add_instance(seq) elif "*" in line: current_motif.set_mask(line.strip("\n\c")) else: raise ValueError(line) return record
def motif(self, line): self.current_motif = Motif() self.motifs.append(self.current_motif) self.current_motif.alphabet = IUPAC.unambiguous_dna