def unalign(self): """unalign() : Remove all gaps from all sequences. """ for eg in self: e = eg.master e.seq = deGappify(e.seq) eg.gappify()
def gappify(self, force=True, gappifier=None): """gappify() : Gappify (align) entries within the same EntryGroup""" if gappifier is not None: for entry in self: entry.seq = gappifier.gappify(entry.seq) return template = self.master.seq if force: diff = True else: diff = False for i in xrange(1, len(self)): lcur = len(self[i].seq) if lcur != len(template): if length_ungapped(template) != length_ungapped( self[i].seq): raise IllegalStateError( "Ungapped lengths differ between master and slave entries in same EntryGroup!" ) diff = True break if diff: for i in xrange(1, len(self)): entry = self[i] newseq = gappify(template, deGappify(entry.seq)) if not newseq: raise ValueError( "Gappification of entry group '%s' failed!" % (entry.code)) entry.seq = newseq
def align(self, degappify=False): """align(degappify=False) : Align all EntryGroups in a multiple sequence alignment, using the master entries. If degappify==True, will first remove all gaps from sequences before realigning. Calls gappify() on all EntryGroups after performing the alignment. """ from prosci.util.seq import align as seqalign if degappify: self.remove_gaps() aligned = Ali(seqalign(self.toFastaString()), fasta_mode=True) for eg_self, eg_aligned in zip(self, aligned): assert deGappify(eg_aligned.master.seq) == deGappify( eg_self.master.seq) eg_self.master.seq = eg_aligned.master.seq eg_self.gappify()
def add(self, entrygroups, merge_duplicates=False, replace_duplicates=False): if isinstance(entrygroups, Ali.EntryGroup): entrygroups = [entrygroups] else: assert isinstance(entrygroups, Ali) for eg in entrygroups: assert eg.getCode() != None if not self.has_entry(eg.getCode()): self.entrygroups.append(eg) else: if not merge_duplicates: raise ValueError("Duplicate Ali.EntryGroup in Ali: %s" % (eg.getCode())) oldeg = self[eg.getCode()] oldMaster = oldeg.getMasterEntry().get_seq() newMaster = eg.getMasterEntry().get_seq() if oldMaster == newMaster: for e in eg: if not e.isMaster(): oldeg.add(e, replace=replace_duplicates) elif deGappify(oldMaster) == deGappify(newMaster): for e in eg: if not e.isMaster(): oldeg.add(e, replace=replace_duplicates) oldeg.gappify() else: raise ValueError( "Master sequences not equal: %s\n%s\n%s\n" % (eg.getCode(), oldMaster, newMaster))
def align(self, other, degappify=False): """align(other, degappify=False) : Align this Entry to another using a sequence alignment. If degappify==True, will first remove all gaps from sequences before realigning. """ from prosci.util.seq import align as seqalign assert self.code != other.code if degappify: self.seq = deGappify(self.seq) other.seq = deGappify(other.seq) aligned = Ali(seqalign(self.toFastaString() + other.toFastaString()), fasta_mode=True) assert len(aligned) == 2 assert deGappify(aligned[self.code].master.seq) == deGappify( self.seq) assert deGappify(aligned[other.code].master.seq) == deGappify( other.seq) self.seq = aligned[self.code].master.seq other.seq = aligned[other.code].master.seq
def score_tm_segments(self, nativeid, modelid, minlength=7, debug=False): native = self.alignment[nativeid]["membrane layer"] model = self.alignment[modelid]["membrane layer"] assert len(native.seq) == len(model.seq) if not self.structures: self.load_structures() tm_region = findTMregion(native.seq, minlength) tm_region2 = [x[:] for x in tm_region] reduceTMregion(tm_region, model.seq, minlength) extendTMregion(tm_region2, model.seq) syncTMregion(tm_region2, tm_region) tm_region_native = columnindex2residueindex(tm_region, native.seq) tm_region_model = columnindex2residueindex(tm_region, model.seq) assert len(tm_region_native) == len(tm_region_model) tm_fragments_native = [ self.structures[nativeid].get_residue_slice(start, end - start) for start, end in tm_region_native ] tm_fragments_model = [ self.structures[modelid].get_residue_slice(start, end - start) for start, end in tm_region_model ] assert len(tm_fragments_native) == len(tm_fragments_model) assert len(tm_fragments_native) == len(tm_region) angles = [] i = 0 while i < len(tm_fragments_native): frag_nat = tm_fragments_native[i] frag_mod = tm_fragments_model[i] alistart, aliend = tm_region[i] assert frag_nat.get_seq() == deGappify( self.alignment[nativeid].getMasterEntry().seq[alistart:aliend]) assert frag_mod.get_seq() == deGappify( self.alignment[modelid].getMasterEntry().seq[alistart:aliend]) assert frag_nat.rescount() == frag_mod.rescount() frag_nat = TMFragment(frag_nat) if not frag_nat.isCorrectOrientation(): del tm_region[i] del tm_region2[i] del tm_region_native[i] del tm_region_model[i] del tm_fragments_native[i] del tm_fragments_model[i] continue frag_mod = TMFragment(frag_mod) tilt_angle = frag_nat.get_relative_tilt(frag_mod) rotation_angle = frag_nat.get_relative_rotation(frag_mod) angles.append((tilt_angle, rotation_angle)) i += 1 #frag_mod2 = frag_mod.overlay_onto(frag_nat) #print rad2deg(angle(frag_nat.rS, frag_mod2.rS)) #print rad2deg(angle(frag_nat.rS, frag_mod.rS)) if debug: plot_fragments(frag_nat, frag_mod) shifts = diffTM(tm_region2, native.seq, model.seq) return tm_region, angles, tm_region2, shifts
def __init__(self, seq1, seq2, gaplist='-'): self.gaplist = gaplist self.seqlen1 = len(seq1) self.seqlen2 = len(seq2) seq1_aligned = deGappify(seq1) seq2_aligned = deGappify(seq2) if seq1_aligned != seq2_aligned: alignment = Ali(">seq1\nsequence\n%s\n>seq2\nsequence\n%s\n" % (seq1, seq2)) alignment.align() # This removes all previously existing gaps seq1_aligned = alignment["seq1"].master.seq seq2_aligned = alignment["seq2"].master.seq assert len(seq1_aligned) == len(seq2_aligned) # We now have 4 sequences: # seq1 with initial gaps # seq1 with new gaps (seq1_aligned) # seq2 with new gaps (seq2_aligned) # seq2 with initial gaps # # We want to map seq1_aligned back to seq1 but keep any gaps we've introduced. # We want to map seq2_aligned back to seq2 but keep any gaps we've introduced. # # We need to do all this in such a way that every change we make to one sequence, we # also make to the other, to keep them the same length. # Calculate the mapping of seq1_aligned to seq1 self.gfr1_aligned2output = Gappifier(seq1_aligned, seq1, gaplist) # Calculate the mapping of seq2_aligned to seq2 self.gfr2_aligned2output = Gappifier(seq2_aligned, seq2, gaplist) # Calculate merge instructions # mergerules1 = [] mergerules2 = [] n1 = 0 n2 = 0 while n1 < len(self.gfr1_aligned2output.insertions) or n2 < len( self.gfr2_aligned2output.insertions): while n1 < len(self.gfr1_aligned2output.insertions): a = self.gfr1_aligned2output.insertions[n1] if a >= 0: break mergerules1.append(-1) mergerules2.append(-1) n1 += 1 while n2 < len(self.gfr2_aligned2output.insertions): b = self.gfr2_aligned2output.insertions[n2] if b >= 0: break mergerules1.append(-1) mergerules2.append(-1) n2 += 1 if n1 >= len(self.gfr1_aligned2output.insertions) or n2 >= len( self.gfr2_aligned2output.insertions): assert n1 >= len( self.gfr1_aligned2output.insertions) and n2 >= len( self.gfr2_aligned2output.insertions) break mergerules1.append(a) mergerules2.append(b) n1 += 1 n2 += 1 self.gfr1_aligned2output.insertions = mergerules1 self.gfr2_aligned2output.insertions = mergerules2 seq1_final = self.gfr1_aligned2output.gappify(seq1_aligned) seq2_final = self.gfr2_aligned2output.gappify(seq2_aligned) """ print seq1 print seq2 print seq1_aligned print seq2_aligned print seq1_final print seq2_final """ self.gfr1_input2output = Gappifier(seq1, seq1_final) self.gfr2_input2output = Gappifier(seq2, seq2_final)