def align(s1, s2, gapOpen, gapExtend): # instantiate the data type to hand to the wraped C code t1 = dpAlign.doubleArray(len(s1)) t2 = dpAlign.doubleArray(len(s2)) # populate the sequences with the correct values for i in range(len(s1)): t1[i] = s1[i] for i in range(len(s2)): t2[i] = s2[i] # Calculate the alignment with DP alignStruct = dpAlign.CalcGlobalAlignment(t1, len(s1), t2, len(s2), \ gapOpen, gapExtend) score = dpAlign.getDPVal(alignStruct) lenSeq1 = dpAlign.getSeqLen(alignStruct, 1) lenSeq2 = dpAlign.getSeqLen(alignStruct, 2) seq1 = {} seq2 = {} for i in range(lenSeq1): seq1[i] = dpAlign.getSeq(alignStruct, 1, i) for i in range(lenSeq2): seq2[i] = dpAlign.getSeq(alignStruct, 2, i) dpAlign.freeAlignInfo(alignStruct) return score, seq1, lenSeq1, seq2, lenSeq2
def assessAlignment(Prot, alignment): familyID = dpAlign.intArray(len(Prot)) alignScore = dpAlign.doubleArray(len(Prot)**2) # build a dictionary of protein names to ensure consistency i = 0 nameList = {} for prtName in Prot: nameList[i] = prtName i = i + 1 # build the data structures for the wrapped C code for i in range(len(Prot)): familyID[i] = int(Prot[nameList[i]].familyID) for j in range(len(Prot)): if i != j: alignScore[i*len(Prot)+j] = \ alignment[(nameList[i], nameList[j], 'score')] else: alignScore[i*len(Prot)+j] = 0 assessmentStruct = dpAlign.assessAlignment(familyID, alignScore, len(Prot)) numCorrect = dpAlign.getNumCorrect(assessmentStruct) ROCarea = dpAlign.getROCarea(assessmentStruct) partitionSize = dpAlign.getPartitionSize(assessmentStruct) TPR = zeros((partitionSize, 1)) FPR = zeros((partitionSize, 1)) for i in range(partitionSize): TPR[i] = dpAlign.getTPR(assessmentStruct, i) FPR[i] = dpAlign.getFPR(assessmentStruct, i) maxScore = dpAlign.getMaxScore(assessmentStruct) minScore = dpAlign.getMinScore(assessmentStruct) bestDiscernmentVal = dpAlign.getBestDiscernmentVal(assessmentStruct) bestTPR = dpAlign.getBestTPR(assessmentStruct) bestFPR = dpAlign.getBestFPR(assessmentStruct) dpAlign.freeAssessInfo(assessmentStruct) plot(FPR, TPR, linewidth=2.0) plot(arange(0.0,1.01,0.01), arange(0.0,1.01,0.01),\ linewidth=1.0, linestyle=':') title('ROC Curve') axis([-0.1, 1.1, -0.1, 1.1]) xlabel('False Positive Rate (FPR)') ylabel('True Positive Rate (TPR)') if not os.path.exists('../tmp/'): os.makedirs('../tmp/') savefig("../tmp/ROC.png") return numCorrect, TPR, FPR, ROCarea, partitionSize,\ maxScore, minScore, bestDiscernmentVal, bestTPR, bestFPR