Ejemplo n.º 1
0
def align(s1, s2, gapOpen, gapExtend):
   # instantiate the data type to hand to the wraped C code
   t1 = dpAlign.doubleArray(len(s1))
   t2 = dpAlign.doubleArray(len(s2))
   # populate the sequences with the correct values
   for i in range(len(s1)):
      t1[i] = s1[i]
   for i in range(len(s2)):
      t2[i] = s2[i]
   # Calculate the alignment with DP
   alignStruct = dpAlign.CalcGlobalAlignment(t1, len(s1), t2, len(s2), \
                                         gapOpen, gapExtend)
   score = dpAlign.getDPVal(alignStruct)
   lenSeq1 = dpAlign.getSeqLen(alignStruct, 1)
   lenSeq2 = dpAlign.getSeqLen(alignStruct, 2)
   seq1 = {}
   seq2 = {}
   for i in range(lenSeq1):
      seq1[i] = dpAlign.getSeq(alignStruct, 1, i)
   for i in range(lenSeq2):
      seq2[i] = dpAlign.getSeq(alignStruct, 2, i)
   dpAlign.freeAlignInfo(alignStruct)
   return score, seq1, lenSeq1, seq2, lenSeq2
Ejemplo n.º 2
0
def assessAlignment(Prot, alignment):
   familyID = dpAlign.intArray(len(Prot))
   alignScore = dpAlign.doubleArray(len(Prot)**2)
   # build a dictionary of protein names to ensure consistency
   i = 0
   nameList = {}
   for prtName in Prot:
      nameList[i] = prtName
      i = i + 1
   # build the data structures for the wrapped C code
   for i in range(len(Prot)):
      familyID[i] = int(Prot[nameList[i]].familyID)
      for j in range(len(Prot)):
         if i != j:
            alignScore[i*len(Prot)+j] = \
               alignment[(nameList[i], nameList[j], 'score')]
         else:
            alignScore[i*len(Prot)+j] = 0
   assessmentStruct = dpAlign.assessAlignment(familyID, alignScore, len(Prot))
   numCorrect = dpAlign.getNumCorrect(assessmentStruct)
   ROCarea = dpAlign.getROCarea(assessmentStruct)
   partitionSize = dpAlign.getPartitionSize(assessmentStruct)
   TPR = zeros((partitionSize, 1))
   FPR = zeros((partitionSize, 1))
   for i in range(partitionSize):
      TPR[i] = dpAlign.getTPR(assessmentStruct, i)
      FPR[i] = dpAlign.getFPR(assessmentStruct, i)
   maxScore = dpAlign.getMaxScore(assessmentStruct)
   minScore = dpAlign.getMinScore(assessmentStruct)
   bestDiscernmentVal = dpAlign.getBestDiscernmentVal(assessmentStruct)
   bestTPR = dpAlign.getBestTPR(assessmentStruct)
   bestFPR = dpAlign.getBestFPR(assessmentStruct)
   dpAlign.freeAssessInfo(assessmentStruct)
   plot(FPR, TPR, linewidth=2.0)
   plot(arange(0.0,1.01,0.01), arange(0.0,1.01,0.01),\
        linewidth=1.0, linestyle=':')
   title('ROC Curve')
   axis([-0.1, 1.1, -0.1, 1.1])
   xlabel('False Positive Rate (FPR)')
   ylabel('True Positive Rate (TPR)')
   if not os.path.exists('../tmp/'):
      os.makedirs('../tmp/')
   savefig("../tmp/ROC.png")
   return numCorrect, TPR, FPR, ROCarea, partitionSize,\
          maxScore, minScore, bestDiscernmentVal, bestTPR, bestFPR