Ejemplo n.º 1
0
 def unalign(self):
     """unalign() : Remove all gaps from all sequences.
     """
     for eg in self:
         e = eg.master
         e.seq = deGappify(e.seq)
         eg.gappify()
Ejemplo n.º 2
0
        def gappify(self, force=True, gappifier=None):
            """gappify() : Gappify (align) entries within the same EntryGroup"""

            if gappifier is not None:
                for entry in self:
                    entry.seq = gappifier.gappify(entry.seq)
                return

            template = self.master.seq

            if force:
                diff = True
            else:
                diff = False
                for i in xrange(1, len(self)):
                    lcur = len(self[i].seq)
                    if lcur != len(template):
                        if length_ungapped(template) != length_ungapped(
                                self[i].seq):
                            raise IllegalStateError(
                                "Ungapped lengths differ between master and slave entries in same EntryGroup!"
                            )
                        diff = True
                        break

            if diff:
                for i in xrange(1, len(self)):
                    entry = self[i]
                    newseq = gappify(template, deGappify(entry.seq))
                    if not newseq:
                        raise ValueError(
                            "Gappification of entry group '%s' failed!" %
                            (entry.code))
                    entry.seq = newseq
Ejemplo n.º 3
0
    def align(self, degappify=False):
        """align(degappify=False) : Align all EntryGroups in a multiple sequence alignment, using the master entries.
        
        If degappify==True, will first remove all gaps from sequences before realigning.
        Calls gappify() on all EntryGroups after performing the alignment.
        """
        from prosci.util.seq import align as seqalign

        if degappify:
            self.remove_gaps()

        aligned = Ali(seqalign(self.toFastaString()), fasta_mode=True)

        for eg_self, eg_aligned in zip(self, aligned):
            assert deGappify(eg_aligned.master.seq) == deGappify(
                eg_self.master.seq)
            eg_self.master.seq = eg_aligned.master.seq
            eg_self.gappify()
Ejemplo n.º 4
0
    def add(self,
            entrygroups,
            merge_duplicates=False,
            replace_duplicates=False):

        if isinstance(entrygroups, Ali.EntryGroup):
            entrygroups = [entrygroups]
        else:
            assert isinstance(entrygroups, Ali)

        for eg in entrygroups:
            assert eg.getCode() != None

            if not self.has_entry(eg.getCode()):
                self.entrygroups.append(eg)
            else:
                if not merge_duplicates:
                    raise ValueError("Duplicate Ali.EntryGroup in Ali: %s" %
                                     (eg.getCode()))

                oldeg = self[eg.getCode()]
                oldMaster = oldeg.getMasterEntry().get_seq()
                newMaster = eg.getMasterEntry().get_seq()
                if oldMaster == newMaster:
                    for e in eg:
                        if not e.isMaster():
                            oldeg.add(e, replace=replace_duplicates)
                elif deGappify(oldMaster) == deGappify(newMaster):
                    for e in eg:
                        if not e.isMaster():
                            oldeg.add(e, replace=replace_duplicates)
                    oldeg.gappify()
                else:
                    raise ValueError(
                        "Master sequences not equal: %s\n%s\n%s\n" %
                        (eg.getCode(), oldMaster, newMaster))
Ejemplo n.º 5
0
        def align(self, other, degappify=False):
            """align(other, degappify=False) : Align this Entry to another using a sequence alignment.
            
            If degappify==True, will first remove all gaps from sequences before realigning.
            """
            from prosci.util.seq import align as seqalign
            assert self.code != other.code

            if degappify:
                self.seq = deGappify(self.seq)
                other.seq = deGappify(other.seq)

            aligned = Ali(seqalign(self.toFastaString() +
                                   other.toFastaString()),
                          fasta_mode=True)
            assert len(aligned) == 2
            assert deGappify(aligned[self.code].master.seq) == deGappify(
                self.seq)
            assert deGappify(aligned[other.code].master.seq) == deGappify(
                other.seq)
            self.seq = aligned[self.code].master.seq
            other.seq = aligned[other.code].master.seq
Ejemplo n.º 6
0
Archivo: tm.py Proyecto: xiongzhp/Kinks
    def score_tm_segments(self, nativeid, modelid, minlength=7, debug=False):
        native = self.alignment[nativeid]["membrane layer"]
        model = self.alignment[modelid]["membrane layer"]

        assert len(native.seq) == len(model.seq)

        if not self.structures:
            self.load_structures()

        tm_region = findTMregion(native.seq, minlength)
        tm_region2 = [x[:] for x in tm_region]
        reduceTMregion(tm_region, model.seq, minlength)
        extendTMregion(tm_region2, model.seq)
        syncTMregion(tm_region2, tm_region)

        tm_region_native = columnindex2residueindex(tm_region, native.seq)
        tm_region_model = columnindex2residueindex(tm_region, model.seq)

        assert len(tm_region_native) == len(tm_region_model)

        tm_fragments_native = [
            self.structures[nativeid].get_residue_slice(start, end - start)
            for start, end in tm_region_native
        ]
        tm_fragments_model = [
            self.structures[modelid].get_residue_slice(start, end - start)
            for start, end in tm_region_model
        ]

        assert len(tm_fragments_native) == len(tm_fragments_model)
        assert len(tm_fragments_native) == len(tm_region)

        angles = []
        i = 0
        while i < len(tm_fragments_native):
            frag_nat = tm_fragments_native[i]
            frag_mod = tm_fragments_model[i]

            alistart, aliend = tm_region[i]
            assert frag_nat.get_seq() == deGappify(
                self.alignment[nativeid].getMasterEntry().seq[alistart:aliend])
            assert frag_mod.get_seq() == deGappify(
                self.alignment[modelid].getMasterEntry().seq[alistart:aliend])
            assert frag_nat.rescount() == frag_mod.rescount()

            frag_nat = TMFragment(frag_nat)
            if not frag_nat.isCorrectOrientation():
                del tm_region[i]
                del tm_region2[i]
                del tm_region_native[i]
                del tm_region_model[i]
                del tm_fragments_native[i]
                del tm_fragments_model[i]
                continue

            frag_mod = TMFragment(frag_mod)
            tilt_angle = frag_nat.get_relative_tilt(frag_mod)
            rotation_angle = frag_nat.get_relative_rotation(frag_mod)
            angles.append((tilt_angle, rotation_angle))
            i += 1

            #frag_mod2  = frag_mod.overlay_onto(frag_nat)
            #print rad2deg(angle(frag_nat.rS, frag_mod2.rS))
            #print rad2deg(angle(frag_nat.rS, frag_mod.rS))
            if debug:
                plot_fragments(frag_nat, frag_mod)

        shifts = diffTM(tm_region2, native.seq, model.seq)

        return tm_region, angles, tm_region2, shifts
Ejemplo n.º 7
0
    def __init__(self, seq1, seq2, gaplist='-'):
        self.gaplist = gaplist
        self.seqlen1 = len(seq1)
        self.seqlen2 = len(seq2)

        seq1_aligned = deGappify(seq1)
        seq2_aligned = deGappify(seq2)

        if seq1_aligned != seq2_aligned:
            alignment = Ali(">seq1\nsequence\n%s\n>seq2\nsequence\n%s\n" %
                            (seq1, seq2))
            alignment.align()  # This removes all previously existing gaps

            seq1_aligned = alignment["seq1"].master.seq
            seq2_aligned = alignment["seq2"].master.seq

            assert len(seq1_aligned) == len(seq2_aligned)

        # We now have 4 sequences:
        # seq1 with initial gaps
        # seq1 with new gaps (seq1_aligned)
        # seq2 with new gaps (seq2_aligned)
        # seq2 with initial gaps
        #
        # We want to map seq1_aligned back to seq1 but keep any gaps we've introduced.
        # We want to map seq2_aligned back to seq2 but keep any gaps we've introduced.
        #
        # We need to do all this in such a way that every change we make to one sequence, we
        # also make to the other, to keep them the same length.

        # Calculate the mapping of seq1_aligned to seq1
        self.gfr1_aligned2output = Gappifier(seq1_aligned, seq1, gaplist)

        # Calculate the mapping of seq2_aligned to seq2
        self.gfr2_aligned2output = Gappifier(seq2_aligned, seq2, gaplist)

        # Calculate merge instructions
        #
        mergerules1 = []
        mergerules2 = []
        n1 = 0
        n2 = 0
        while n1 < len(self.gfr1_aligned2output.insertions) or n2 < len(
                self.gfr2_aligned2output.insertions):
            while n1 < len(self.gfr1_aligned2output.insertions):
                a = self.gfr1_aligned2output.insertions[n1]
                if a >= 0:
                    break
                mergerules1.append(-1)
                mergerules2.append(-1)
                n1 += 1

            while n2 < len(self.gfr2_aligned2output.insertions):
                b = self.gfr2_aligned2output.insertions[n2]
                if b >= 0:
                    break
                mergerules1.append(-1)
                mergerules2.append(-1)
                n2 += 1

            if n1 >= len(self.gfr1_aligned2output.insertions) or n2 >= len(
                    self.gfr2_aligned2output.insertions):
                assert n1 >= len(
                    self.gfr1_aligned2output.insertions) and n2 >= len(
                        self.gfr2_aligned2output.insertions)
                break

            mergerules1.append(a)
            mergerules2.append(b)
            n1 += 1
            n2 += 1

        self.gfr1_aligned2output.insertions = mergerules1
        self.gfr2_aligned2output.insertions = mergerules2

        seq1_final = self.gfr1_aligned2output.gappify(seq1_aligned)
        seq2_final = self.gfr2_aligned2output.gappify(seq2_aligned)
        """
    print seq1
    print seq2
    print seq1_aligned
    print seq2_aligned
    print seq1_final
    print seq2_final
    """

        self.gfr1_input2output = Gappifier(seq1, seq1_final)
        self.gfr2_input2output = Gappifier(seq2, seq2_final)