Ejemplo n.º 1
0
def consensusMultipleAlign(seqs, threshold, simMatrix):

    n = len(seqs)
    multipleAlign = []

    i = 0
    for j in range(i + 1, n):
        seqB = seqs[j]

        if not multipleAlign:
            seqA = seqs[i]
            score, alignA, alignB = sequenceAlign(seqA, seqB, simMatrix)
            multipleAlign.append(alignA)
            multipleAlign.append(alignB)

        else:
            seqA = consensus(multipleAlign, threshold)
            score, alignA, alignB = sequenceAlign(seqA, seqB, simMatrix)

            gaps = []
            for k, letter in enumerate(alignA):
                if letter == '-':
                    gaps.append(k)

            for k, seq in enumerate(multipleAlign):
                for gap in gaps:
                    seq = seq[:gap] + '-' + seq[gap:]

                multipleAlign[k] = seq

            multipleAlign.append(alignB)

    for k, seq in enumerate(multipleAlign):
        print(k, seq)
Ejemplo n.º 2
0
def consensusMultipleAlign(seqs, threshold, simMatrix):
    #counts number of sequences
    n = len(seqs)
    #setting default penalty and new list
    insert = 2
    extend = 4
    multipleAlign = []

    i = 0
    #looping through sequences
    #set j as i+1 to compare first to second seq
    for j in range(i + 1, n):
        #seqB starts as first sequence
        seqB = seqs[j]

        #at the beginning, seqA is our first sequence
        if not multipleAlign:
            seqA = seqs[i]
            #Align our two sequences (with gaps!)
            #outputs: score and two newly aligned sequences
            score, alignA, alignB = sequenceAlign(seqA, seqB, simMatrix,
                                                  insert, extend)
            #adds newly aligned sequences to our list
            multipleAlign.append(alignA)
            multipleAlign.append(alignB)

        else:
            # seqA is our consensus sequence and align seqB to our consensus seq
            seqA = consensus(multipleAlign, threshold)
            score, alignA, alignB = sequenceAlign(seqA, seqB, simMatrix,
                                                  insert, extend)
            #creates new list
            gaps = []
            #for everythin in alignA list, k is index, and letter is value
            for k, letter in enumerate(alignA):
                # if letter is a dash (there's a gap)
                if letter == '-':
                    #appends index to gap
                    #keeps track of locations of gaps
                    gaps.append(k)

            #enumerate through each sequence in multipleAlign
            for k, seq in enumerate(multipleAlign):
                #go through list of gaps
                for gap in gaps:
                    # add a dash at each index for each gap
                    seq = seq[:gap] + '-' + seq[gap:]

                #
                multipleAlign[k] = seq

            #
            multipleAlign.append(alignB)

    #
    for k, seq in enumerate(multipleAlign):
        print(k, seq)
Ejemplo n.º 3
0
def seqStructureBackboneAlign(chainA,
                              chainB,
                              atomNames=set(['CA', 'C', 'N']),
                              simMatrix=BLOSUM62):

    structureA = chainA.structure
    structureB = chainB.structure
    residuesA = chainA.residues
    residuesB = chainB.residues

    seqA = getChainSequence(chainA)
    seqB = getChainSequence(chainB)

    score, alignA, alignB = sequenceAlign(seqA, seqB, simMatrix)

    pairedPosA = []
    pairedPosB = []
    posA = 0
    posB = 0

    for i in range(len(alignA)):
        # No dashes in both at same location

        if alignA[i] == '-':
            posB += 1

        elif alignB[i] == '-':
            posA += 1

        else:
            pairedPosA.append(posA)
            pairedPosB.append(posB)

            posA += 1
            posB += 1

    filterIdsA = [residuesA[p].seqId for p in pairedPosA]
    filterIdsB = [residuesB[p].seqId for p in pairedPosB]

    backboneStrucA = filterSubStructure(structureA, [chainA.code], filterIdsA,
                                        atomNames)
    backboneStrucB = filterSubStructure(structureB, [chainB.code], filterIdsB,
                                        atomNames)

    atomsA, coordsA = getAtomCoords(backboneStrucA)
    atomsB, coordsB = getAtomCoords(backboneStrucB)
    weights = ones(len(atomsA))

    coordsA, centerA = centerCoords(coordsA, weights)
    coordsB, centerB = centerCoords(coordsB, weights)

    coords = [coordsA, coordsB]
    c, rmsds, atomRmsds, rotations = superimposeCoords(coords, weights)

    affineTransformStructure(structureA, rotations[0], -centerA)
    affineTransformStructure(structureB, rotations[1], -centerB)

    return rmsds, atomRmsds
Ejemplo n.º 4
0
def seqStructureBackboneAlign(chainA, chainB, 
                              atomNames=set(['CA','C','N']),
                              simMatrix=BLOSUM62):

  structureA = chainA.structure
  structureB = chainB.structure
  residuesA = chainA.residues
  residuesB = chainB.residues

  seqA = getChainSequence(chainA)
  seqB = getChainSequence(chainB)

  score, alignA, alignB = sequenceAlign(seqA, seqB, simMatrix)

  pairedPosA = []
  pairedPosB = []  
  posA = 0
  posB = 0

  for i in range(len(alignA)):
    # No dashes in both at same location
  
    if alignA[i] == '-':
      posB += 1
   
    elif alignB[i] == '-':
      posA += 1
    
    else:
      pairedPosA.append(posA)
      pairedPosB.append(posB)
      
      posA += 1
      posB += 1

  filterIdsA = [residuesA[p].seqId for p in pairedPosA]
  filterIdsB = [residuesB[p].seqId for p in pairedPosB]

  backboneStrucA = filterSubStructure(structureA, [chainA.code], 
                                      filterIdsA, atomNames)
  backboneStrucB = filterSubStructure(structureB, [chainB.code], 
                                      filterIdsB, atomNames)
  
  atomsA, coordsA = getAtomCoords(backboneStrucA)
  atomsB, coordsB = getAtomCoords(backboneStrucB)
  weights = ones(len(atomsA))
  
  coordsA, centerA = centerCoords(coordsA, weights)
  coordsB, centerB = centerCoords(coordsB, weights)
    
  coords = [coordsA, coordsB]
  c, rmsds, atomRmsds, rotations = superimposeCoords(coords, weights)

  affineTransformStructure(structureA, rotations[0], -centerA)  
  affineTransformStructure(structureB, rotations[1], -centerB)  
    
  return rmsds, atomRmsds
Ejemplo n.º 5
0
def getDistanceMatrix(seqs, simMatrix):

  n = len(seqs)
  matrix = [[0.0] * n for x in range(n)]
  maxScores = [calcSeqSimilarity(x, x, simMatrix) for x in seqs]

  for i in range(n-1):
    seqA = seqs[i]
  
    for j in range(i+1,n):
      seqB = seqs[j]
      
      score, alignA, alignB = sequenceAlign(seqA, seqB, simMatrix)
      maxScore = max(maxScores[i],maxScores[j])
      dist = maxScore - score
      
      matrix[i][j] = dist
      matrix[j][i] = dist

  return matrix
Ejemplo n.º 6
0
def getDistanceMatrix(seqs, simMatrix):

    n = len(seqs)
    matrix = [[0.0] * n for x in range(n)]
    maxScores = [calcSeqSimilarity(x, x, simMatrix) for x in seqs]

    for i in range(n - 1):
        seqA = seqs[i]

        for j in range(i + 1, n):
            seqB = seqs[j]

            score, alignA, alignB = sequenceAlign(seqA, seqB, simMatrix)
            maxScore = max(maxScores[i], maxScores[j])
            dist = maxScore - score

            matrix[i][j] = dist
            matrix[j][i] = dist

    return matrix
Ejemplo n.º 7
0
def simpleProfileMultipleAlign(seqs, simMatrix):

    n = len(seqs)

    score, alignA, alignB = sequenceAlign(seqs[0], seqs[1], simMatrix)
    multipleAlign = [alignA, alignB]

    for i in range(2, n):
        profA = profile(multipleAlign)
        toAdd = [
            seqs[i],
        ]
        profB = profile(toAdd)

        score, alignA, alignB = profileAlign(profA, profB, simMatrix)

        gaps = []
        for j, fractions in enumerate(alignA):
            if fractions is None:
                gaps.append(j)

        for j, seq in enumerate(multipleAlign):
            for gap in gaps:
                seq = seq[:gap] + '-' + seq[gap:]

            multipleAlign[j] = seq

        gaps = []
        for j, fractions in enumerate(alignB):
            if fractions is None:
                gaps.append(j)

        for j, seq in enumerate(toAdd):
            for gap in gaps:
                seq = seq[:gap] + '-' + seq[gap:]

            toAdd[j] = seq

        multipleAlign.extend(toAdd)

    return multipleAlign