def consensusMultipleAlign(seqs, threshold, simMatrix): n = len(seqs) multipleAlign = [] i = 0 for j in range(i + 1, n): seqB = seqs[j] if not multipleAlign: seqA = seqs[i] score, alignA, alignB = sequenceAlign(seqA, seqB, simMatrix) multipleAlign.append(alignA) multipleAlign.append(alignB) else: seqA = consensus(multipleAlign, threshold) score, alignA, alignB = sequenceAlign(seqA, seqB, simMatrix) gaps = [] for k, letter in enumerate(alignA): if letter == '-': gaps.append(k) for k, seq in enumerate(multipleAlign): for gap in gaps: seq = seq[:gap] + '-' + seq[gap:] multipleAlign[k] = seq multipleAlign.append(alignB) for k, seq in enumerate(multipleAlign): print(k, seq)
def consensusMultipleAlign(seqs, threshold, simMatrix): #counts number of sequences n = len(seqs) #setting default penalty and new list insert = 2 extend = 4 multipleAlign = [] i = 0 #looping through sequences #set j as i+1 to compare first to second seq for j in range(i + 1, n): #seqB starts as first sequence seqB = seqs[j] #at the beginning, seqA is our first sequence if not multipleAlign: seqA = seqs[i] #Align our two sequences (with gaps!) #outputs: score and two newly aligned sequences score, alignA, alignB = sequenceAlign(seqA, seqB, simMatrix, insert, extend) #adds newly aligned sequences to our list multipleAlign.append(alignA) multipleAlign.append(alignB) else: # seqA is our consensus sequence and align seqB to our consensus seq seqA = consensus(multipleAlign, threshold) score, alignA, alignB = sequenceAlign(seqA, seqB, simMatrix, insert, extend) #creates new list gaps = [] #for everythin in alignA list, k is index, and letter is value for k, letter in enumerate(alignA): # if letter is a dash (there's a gap) if letter == '-': #appends index to gap #keeps track of locations of gaps gaps.append(k) #enumerate through each sequence in multipleAlign for k, seq in enumerate(multipleAlign): #go through list of gaps for gap in gaps: # add a dash at each index for each gap seq = seq[:gap] + '-' + seq[gap:] # multipleAlign[k] = seq # multipleAlign.append(alignB) # for k, seq in enumerate(multipleAlign): print(k, seq)
def seqStructureBackboneAlign(chainA, chainB, atomNames=set(['CA', 'C', 'N']), simMatrix=BLOSUM62): structureA = chainA.structure structureB = chainB.structure residuesA = chainA.residues residuesB = chainB.residues seqA = getChainSequence(chainA) seqB = getChainSequence(chainB) score, alignA, alignB = sequenceAlign(seqA, seqB, simMatrix) pairedPosA = [] pairedPosB = [] posA = 0 posB = 0 for i in range(len(alignA)): # No dashes in both at same location if alignA[i] == '-': posB += 1 elif alignB[i] == '-': posA += 1 else: pairedPosA.append(posA) pairedPosB.append(posB) posA += 1 posB += 1 filterIdsA = [residuesA[p].seqId for p in pairedPosA] filterIdsB = [residuesB[p].seqId for p in pairedPosB] backboneStrucA = filterSubStructure(structureA, [chainA.code], filterIdsA, atomNames) backboneStrucB = filterSubStructure(structureB, [chainB.code], filterIdsB, atomNames) atomsA, coordsA = getAtomCoords(backboneStrucA) atomsB, coordsB = getAtomCoords(backboneStrucB) weights = ones(len(atomsA)) coordsA, centerA = centerCoords(coordsA, weights) coordsB, centerB = centerCoords(coordsB, weights) coords = [coordsA, coordsB] c, rmsds, atomRmsds, rotations = superimposeCoords(coords, weights) affineTransformStructure(structureA, rotations[0], -centerA) affineTransformStructure(structureB, rotations[1], -centerB) return rmsds, atomRmsds
def seqStructureBackboneAlign(chainA, chainB, atomNames=set(['CA','C','N']), simMatrix=BLOSUM62): structureA = chainA.structure structureB = chainB.structure residuesA = chainA.residues residuesB = chainB.residues seqA = getChainSequence(chainA) seqB = getChainSequence(chainB) score, alignA, alignB = sequenceAlign(seqA, seqB, simMatrix) pairedPosA = [] pairedPosB = [] posA = 0 posB = 0 for i in range(len(alignA)): # No dashes in both at same location if alignA[i] == '-': posB += 1 elif alignB[i] == '-': posA += 1 else: pairedPosA.append(posA) pairedPosB.append(posB) posA += 1 posB += 1 filterIdsA = [residuesA[p].seqId for p in pairedPosA] filterIdsB = [residuesB[p].seqId for p in pairedPosB] backboneStrucA = filterSubStructure(structureA, [chainA.code], filterIdsA, atomNames) backboneStrucB = filterSubStructure(structureB, [chainB.code], filterIdsB, atomNames) atomsA, coordsA = getAtomCoords(backboneStrucA) atomsB, coordsB = getAtomCoords(backboneStrucB) weights = ones(len(atomsA)) coordsA, centerA = centerCoords(coordsA, weights) coordsB, centerB = centerCoords(coordsB, weights) coords = [coordsA, coordsB] c, rmsds, atomRmsds, rotations = superimposeCoords(coords, weights) affineTransformStructure(structureA, rotations[0], -centerA) affineTransformStructure(structureB, rotations[1], -centerB) return rmsds, atomRmsds
def getDistanceMatrix(seqs, simMatrix): n = len(seqs) matrix = [[0.0] * n for x in range(n)] maxScores = [calcSeqSimilarity(x, x, simMatrix) for x in seqs] for i in range(n-1): seqA = seqs[i] for j in range(i+1,n): seqB = seqs[j] score, alignA, alignB = sequenceAlign(seqA, seqB, simMatrix) maxScore = max(maxScores[i],maxScores[j]) dist = maxScore - score matrix[i][j] = dist matrix[j][i] = dist return matrix
def getDistanceMatrix(seqs, simMatrix): n = len(seqs) matrix = [[0.0] * n for x in range(n)] maxScores = [calcSeqSimilarity(x, x, simMatrix) for x in seqs] for i in range(n - 1): seqA = seqs[i] for j in range(i + 1, n): seqB = seqs[j] score, alignA, alignB = sequenceAlign(seqA, seqB, simMatrix) maxScore = max(maxScores[i], maxScores[j]) dist = maxScore - score matrix[i][j] = dist matrix[j][i] = dist return matrix
def simpleProfileMultipleAlign(seqs, simMatrix): n = len(seqs) score, alignA, alignB = sequenceAlign(seqs[0], seqs[1], simMatrix) multipleAlign = [alignA, alignB] for i in range(2, n): profA = profile(multipleAlign) toAdd = [ seqs[i], ] profB = profile(toAdd) score, alignA, alignB = profileAlign(profA, profB, simMatrix) gaps = [] for j, fractions in enumerate(alignA): if fractions is None: gaps.append(j) for j, seq in enumerate(multipleAlign): for gap in gaps: seq = seq[:gap] + '-' + seq[gap:] multipleAlign[j] = seq gaps = [] for j, fractions in enumerate(alignB): if fractions is None: gaps.append(j) for j, seq in enumerate(toAdd): for gap in gaps: seq = seq[:gap] + '-' + seq[gap:] toAdd[j] = seq multipleAlign.extend(toAdd) return multipleAlign