Beispiel #1
0
def _read_motif_seq(block):
    """
    From the block of lines used in MotifFile, return a Motif object
    """
    gapped = Gapped(ExtendedIUPACDNA(), '-')
    instances = []
    head = block[0][0]
    head = head.rstrip("\n").split(" ")
    motif_name = head[0]
    motif_length = head[-1]

    for l in block[1]:
        l = l.rstrip("\n").split("\t")
        instance = meme.Instance(l[-1], gapped)

        instance.motif_name = motif_name
        instance.sequence_name = l[0].translate(None, " ")
        instance.start = int(l[1].translate(None, " "))
        instance.length = int(motif_length)

        instances.append(instance)

    instances = motifs.Instances(instances, alphabet=gapped)

    motif = WeightedMotif(gapped, instances, float(head[-5]), float(head[-3]))
    motif.length = motif_length
    motif.name = motif_name

    return motif
Beispiel #2
0
def _read_sites(handle):
    """Read the motif from JASPAR .sites file (PRIVATE)."""
    alphabet = dna
    instances = []

    for line in handle:
        if not line.startswith(">"):
            break
        # line contains the header ">...."
        # now read the actual sequence
        line = next(handle)
        instance = ""
        for c in line.strip():
            if c == c.upper():
                instance += c
        instance = Seq(instance, alphabet)
        instances.append(instance)

    instances = motifs.Instances(instances, alphabet)
    motif = Motif(matrix_id=None,
                  name=None,
                  alphabet=alphabet,
                  instances=instances)
    motif.mask = "*" * motif.length
    record = Record()
    record.append(motif)

    return record
Beispiel #3
0
def convert(motifs_list, alphabet):
    """
    Returns a new list of motifs converting using given alphabet.
    """

    new_list = []
    for motif in motifs_list:
        tot_inst = []

        # Recreate instances using given alphabet
        for i in motif.instances:
            inst = meme.Instance(i.tostring(), alphabet)
            inst.motif_name = i.motif_name
            inst.sequence_name = i.sequence_name
            inst.start = int(i.start)
            inst.length = int(i.length)

            tot_inst.append(inst)
        tot_inst = motifs.Instances(tot_inst, alphabet)

        # Converting instances and motifs
        mot = meme.Motif(alphabet=alphabet, instances=tot_inst)
        mot.name = motif.name
        mot.evalue = motif.evalue

        new_list.append(mot)

    return new_list
Beispiel #4
0
def __read_motifs(record, xml_tree, sequence_id_name_map):
    for motif_tree in xml_tree.find("motifs").findall("motif"):
        instances = []
        for site_tree in motif_tree.find("contributing_sites").findall(
            "contributing_site"
        ):
            letters = [
                letter_ref.get("letter_id")
                for letter_ref in site_tree.find("site").findall("letter_ref")
            ]
            sequence = "".join(letters)
            instance = Instance(sequence)
            instance.motif_name = motif_tree.get("name")
            instance.sequence_id = site_tree.get("sequence_id")
            instance.sequence_name = sequence_id_name_map[instance.sequence_id]
            # TODO - left flank, right flank
            instance.start = int(site_tree.get("position")) + 1
            instance.pvalue = float(site_tree.get("pvalue"))
            instance.strand = __convert_strand(site_tree.get("strand"))
            instance.length = len(sequence)
            instances.append(instance)
        instances = motifs.Instances(instances, record.alphabet)
        motif = Motif(record.alphabet, instances)
        motif.id = motif_tree.get("id")
        motif.name = motif_tree.get("name")
        motif.alt_id = motif_tree.get("alt")
        motif.length = int(motif_tree.get("width"))
        motif.num_occurrences = int(motif_tree.get("sites"))
        motif.evalue = float(motif_tree.get("e_value"))
        # TODO - ic, re, llr, pvalue, bayes_threshold, elapsed_time
        record.append(motif)
Beispiel #5
0
def get_random_instances(records, motiflength):
    """
    Function that gets some fully random motifs out of a list of sequences
    :param records:
    :param motiflength:
    :return:
    """
    # get a random gapsize to start, the gap will be refined after multiple iterations of the algorithm
    global gapSize
    gapSize = random.randint(0, Config.max_gapsize + 1)
    instances = motifs.Instances()
    for idx, record in enumerate(records):
        pos = random.randint(0, len(record.seq) - (motiflength + gapSize))
        gappos = random.randint(0, motiflength)
        seq = None
        if (pos + gappos + gapSize - pos + motiflength + gapSize > 0):
            seq = record.seq[pos:pos + gappos] + record.seq[pos + gappos + gapSize:pos + motiflength + gapSize]
        else:
            if (gappos == 0):
                seq = record.seq[pos + gappos + gapSize:pos + motiflength + gapSize]
            else:
                seq = record.seq[pos:pos + gappos]
        instances.append(seq)
        gapList[idx] = gappos
    return instances
Beispiel #6
0
def new_motif(sites):
    """Given sites, return motif object"""
    sites = listutils.nub_by(sequence.overlap_test, sites)
    seqs = [site.seq for site in sites]
    motif_ = motifs.Motif(instances=motifs.Instances(seqs))
    motif_.pseudocounts = dict(A=0.25, C=0.25, G=0.25, T=0.25)
    return Motif(sites, motif_)
Beispiel #7
0
def permute(motif):
    """Permute the given motif by shuffling its columns"""
    cols = range(length(motif))
    random.shuffle(cols)
    shuffled = [''.join(site[i] for i in cols) for site in seqs(motif)]
    _motif = motifs.Motif(instances=motifs.Instances(shuffled))
    _motif.pseudocounts = pseudocounts(motif)
    return Motif(None, _motif)
Beispiel #8
0
def ic_at(motif, other, offset):
    """Return the total IC of two aligned motifs"""
    alignment_len = min(length(motif) - offset, length(other))
    motif_seqs = [site[offset:alignment_len + offset] for site in seqs(motif)]
    other_seqs = [site[:alignment_len] for site in seqs(other)]
    # Create the motif and compute the IC
    amotif = motifs.Motif(instances=motifs.Instances(motif_seqs + other_seqs))
    amotif.pseudocounts = dict(A=0.25, C=0.25, G=0.25, T=0.25)
    return amotif.pssm.mean()
Beispiel #9
0
def __read_motif_sequences(handle, motif_name, alphabet, length, revcomp):
    try:
        line = next(handle)
    except StopIteration:
        raise ValueError(
            'Unexpected end of stream: Failed to find motif sequences')
    if not line.startswith('---'):
        raise ValueError("Line does not start with '---':\n%s" % line)
    try:
        line = next(handle)
    except StopIteration:
        raise ValueError(
            "Unexpected end of stream: Expected to find line starting with 'Sequence name'"
        )
    if not line.startswith('Sequence name'):
        raise ValueError("Line does not start with 'Sequence name':\n%s" %
                         line)
    try:
        line = next(handle)
    except StopIteration:
        raise ValueError(
            'Unexpected end of stream: Failed to find motif sequences')
    if not line.startswith('---'):
        raise ValueError("Line does not start with '---':\n%s" % line)
    instances = []
    for line in handle:
        if line.startswith('---'):
            break
        line = line.strip()
        words = line.split()
        if revcomp:
            strand = words.pop(1)
        else:
            strand = '+'
        sequence = words[4]
        assert len(sequence) == length
        instance = Instance(sequence, alphabet)
        instance.motif_name = motif_name
        instance.sequence_name = words[0]
        instance.start = int(words[1])
        instance.pvalue = float(words[2])
        instance.strand = strand
        instance.length = length
        instances.append(instance)
    else:
        raise ValueError('Unexpected end of stream')
    return motifs.Instances(instances, alphabet)