def computeSimilarity(dataArray): start = time() sampleIDList = dataArray[8] n = len(sampleIDList) matrix = np.zeros((n,n)) for i in range(n): for j in range(i,n): #Total ratio computation common,in1,in2,_,_,_,_,_ = compute(dataArray[7],[sampleIDList[i]],[sampleIDList[j]]) commonA = countAssignmentsInCommon(common,[sampleIDList[i]],[sampleIDList[j]]) numberA1 = countAssignments(in1,[sampleIDList[i]]) numberA2 = countAssignments(in2,[sampleIDList[j]]) tratio = totalRatio(commonA,numberA1,numberA2) #Pattern ratio computation commonPatternsList = enumerateCommonPatterns(dataArray[7],[sampleIDList[i]],[sampleIDList[j]]) specificPatternsList1 = enumerateSpecificPatterns(dataArray[7],[sampleIDList[i]],[sampleIDList[j]]) specificPatternsList2 = enumerateSpecificPatterns(dataArray[7],[sampleIDList[i]],[sampleIDList[j]]) pRatio = patternRatio(commonPatternsList,specificPatternsList1,specificPatternsList2) #Diversity coefficient dRatio1,_ = computeDiversityCoefficient(dataArray[5],[sampleIDList[i]],dataArray) dRatio2,_ = computeDiversityCoefficient(dataArray[5],[sampleIDList[j]],dataArray) subdRatio = abs(dRatio1 - dRatio2) if subdRatio: s = sumOpInf(pRatio,tratio) - subdRatio else: s = sumOpInf(pRatio,tratio) matrix[i][j] = s matrix[j][i] = s end = time() print "TIME:",(end-start) return matrix
def totalDiffRatioAct(dataArray): print "First list of samples." sampleNameList1,metadataList1,interval1List1,interval2List1 = createSampleNameList(dataArray) print "Second list of samples." sampleNameList2,metadataList2,interval1List2,interval2List2 = createSampleNameList(dataArray) common,in1,in2,numberA1,numberA2,_,_,_ = compute(dataArray[7],sampleNameList1,sampleNameList2) commonA = countAssignmentsInCommon(common,sampleNameList1,sampleNameList2) tratio = totalRatio(commonA,numberA1,numberA2) ntRatio = totalRatioNormalized(commonA,numberA1,numberA2) dratio = diffRatio(commonA) ndRatio = diffRatioNormalized(commonA,numberA1,numberA2) print "\nTotal Ratio Distance is: " + str(tratio) print "normalized Total Ratio is: " + str(ntRatio) + "\n[The more it is close to 1, the more the two groups are alike]\n" print "Diff Ratio Distance is: " + str(dratio) print "normalized Diff Ratio is: " + str(ndRatio) + "\n[The more it is close to 0, the more the two groups are alike]\n" print "[If you have obtained +inf (resp. -inf), it could mean you have selected no sample.]\n" answer = raw_input("Save the results? Y/N\n") if (answer == "Y"): data = "Total Ratio Results ****\n for " + str(sampleNameList1) + "\n" if metadataList1: data += "selected on metadata: " + str(metadataList1) + " with extremum values: " + str(interval1List1) + " (lower bounds) and " + str(interval2List1) + " (upper bounds) \n" data += " and " + str(sampleNameList2) + "\n" if metadataList2: data += "selected on metadata: " + str(metadataList2) + " with extremum values: " + str(interval1List2) + " (lower bounds) and " + str(interval2List2) + " (upper bounds) \n" data += "\nTotal Ratio Distance is: " + str(tratio) + "\n normalized Total Ratio is: " + str(ntRatio) + "\nDiff Ratio Distance is: " + str(dratio) + "\n normalized Diff Ratio is: " + str(ndRatio) +"\n\nEND OF FILE ****" writeFile(data,"","text") elif not (answer == "N"): print "/!\ You should answer 'Y' or 'N'!"