예제 #1
0
def read(handle, format):
    alphabet = IUPAC.unambiguous_dna
    counts = {}
    if format == "pfm":
        # reads the motif from Jaspar .pfm file
        letters = "ACGT"
        for letter, line in zip(letters, handle):
            words = line.split()
            #if there is a letter in the beginning, ignore it
            if words[0] == letter:
                words = words[1:]
            counts[letter] = map(float, words)
        motif = Motif(alphabet, counts=counts)
    elif format == "sites":
        # reads the motif from Jaspar .sites file
        instances = []
        for line in handle:
            if not line.startswith(">"):
                break
            # line contains the header ">...."
            # now read the actual sequence
            line = handle.next()
            instance = ""
            for c in line.strip():
                if c == c.upper():
                    instance += c
            instance = Seq(instance, alphabet)
            instances.append(instance)
        motif = Motif(alphabet, instances=instances)
    else:
        raise ValueError("Unknown format %s" % format)
    motif.mask = "*" * motif.length
    return motif
예제 #2
0
def read(handle):
    """read(handle)"""
    record = Record()
    record.ver = next(handle)
    record.cmd_line = next(handle)
    for line in handle:
        if line.strip() == "":
            pass
        elif line[:4] == "Para":
            record.param_dict = {}
        elif line[0] == "#":
            seq_name = line.split("\t")[1]
            record.seq_dict.append(seq_name)
        elif "=" in line:
            par_name = line.split("=")[0].strip()
            par_value = line.split("=")[1].strip()
            record.param_dict[par_name] = par_value
        elif line[:5] == "Input":
            record.seq_dict = []
        elif line[:5] == "Motif":
            record.current_motif = Motif()
            record.motifs.append(record.current_motif)
            record.current_motif.alphabet = IUPAC.unambiguous_dna
        elif line[:3] == "MAP":
            record.current_motif.score = float(line.split()[-1])
        elif len(line.split("\t")) == 4:
            seq = Seq(line.split("\t")[0], IUPAC.unambiguous_dna)
            record.current_motif.add_instance(seq)
        elif "*" in line:
            record.current_motif.set_mask(line.strip("\n\c"))
        else:
            raise ValueError(line)
    return record
예제 #3
0
    def construct_weblogo(self, weblogo_filename, weblogo_revcompl_filename):
        self.weblogo_basename = os.path.basename(weblogo_filename)
        self.weblogo_revcompl_basename = os.path.basename(
            weblogo_revcompl_filename)

        motif = Motif(alphabet=IUPAC.unambiguous_dna)
        for kmer in self.kmer_lst:
            motif.add_instance(Seq(kmer, motif.alphabet))

        logowidth_normal = self.construct_weblogo_helper(
            weblogo_filename, motif)

        #reverse complement
        motif_revcompl = motif.reverse_complement()
        logowidth_revcompl = self.construct_weblogo_helper(
            weblogo_revcompl_filename, motif_revcompl)

        self.logowidth = max(self.logowidth, logowidth_normal,
                             logowidth_revcompl)
예제 #4
0
def read(handle):
    """read(handle)"""
    record = Record()
    line = handle.next()
    record.version = line.strip()
    line = handle.next()
    record.command = line.strip()
    for line in handle:
        line = line.strip()
        if line == "":
            pass
        elif line[:4] == "Para":
            record.parameters = {}
        elif line[0] == "#":
            seq_name = line.split("\t")[1]
            record.sequences.append(seq_name)
        elif "=" in line:
            par_name, par_value = line.split("=")
            par_name = par_name.strip()
            par_value = par_value.strip()
            record.parameters[par_name] = par_value
        elif line[:5] == "Input":
            record.sequences = []
        elif line[:5] == "Motif":
            words = line.split()
            assert words[0] == "Motif"
            number = int(words[1])
            instances = []
        elif line[:3] == "MAP":
            motif = Motif(IUPAC.unambiguous_dna, instances)
            motif.score = float(line.split()[-1])
            motif.number = number
            motif.set_mask(mask)
            record.motifs.append(motif)
        elif len(line.split("\t")) == 4:
            seq = Seq(line.split("\t")[0], IUPAC.unambiguous_dna)
            instances.append(seq)
        elif "*" in line:
            mask = line.strip("\r\n")
        else:
            raise ValueError(line)
    return record
예제 #5
0
def read(handle):
    """read(handle)"""
    record = Record()
    line = handle.next()
    record.version = line.strip()
    line = handle.next()
    record.command = line.strip()
    for line in handle:
        line = line.strip()
        if line == "":
            pass
        elif line[:4] == "Para":
            record.parameters = {}
        elif line[0] == "#":
            seq_name = line.split("\t")[1]
            record.sequences.append(seq_name)
        elif "=" in line:
            par_name, par_value = line.split("=")
            par_name = par_name.strip()
            par_value = par_value.strip()
            record.parameters[par_name] = par_value
        elif line[:5] == "Input":
            record.sequences = []
        elif line[:5] == "Motif":
            current_motif = Motif()
            current_motif.alphabet = IUPAC.unambiguous_dna
            record.motifs.append(current_motif)
        elif line[:3] == "MAP":
            current_motif.score = float(line.split()[-1])
        elif len(line.split("\t")) == 4:
            seq = Seq(line.split("\t")[0], IUPAC.unambiguous_dna)
            current_motif.add_instance(seq)
        elif "*" in line:
            current_motif.set_mask(line.strip("\n\c"))
        else:
            raise ValueError(line)
    return record
예제 #6
0
 def motif(self, line):
     self.current_motif = Motif()
     self.motifs.append(self.current_motif)
     self.current_motif.alphabet = IUPAC.unambiguous_dna