Ejemplo n.º 1
0
def computeSimilarity(dataArray):
    start = time()
    sampleIDList = dataArray[8]
    n = len(sampleIDList)
    matrix = np.zeros((n,n))
    for i in range(n):
        for j in range(i,n):
            #Total ratio computation
            common,in1,in2,_,_,_,_,_ = compute(dataArray[7],[sampleIDList[i]],[sampleIDList[j]])
            commonA = countAssignmentsInCommon(common,[sampleIDList[i]],[sampleIDList[j]])
            numberA1 = countAssignments(in1,[sampleIDList[i]])
            numberA2 = countAssignments(in2,[sampleIDList[j]])
            tratio = totalRatio(commonA,numberA1,numberA2)
            #Pattern ratio computation
            commonPatternsList = enumerateCommonPatterns(dataArray[7],[sampleIDList[i]],[sampleIDList[j]])
            specificPatternsList1 = enumerateSpecificPatterns(dataArray[7],[sampleIDList[i]],[sampleIDList[j]])
            specificPatternsList2 = enumerateSpecificPatterns(dataArray[7],[sampleIDList[i]],[sampleIDList[j]])
            pRatio = patternRatio(commonPatternsList,specificPatternsList1,specificPatternsList2)
            #Diversity coefficient
            dRatio1,_ = computeDiversityCoefficient(dataArray[5],[sampleIDList[i]],dataArray)
            dRatio2,_ = computeDiversityCoefficient(dataArray[5],[sampleIDList[j]],dataArray)
            subdRatio = abs(dRatio1 - dRatio2)
            if subdRatio:
                s = sumOpInf(pRatio,tratio) - subdRatio
            else:
                s = sumOpInf(pRatio,tratio)
            matrix[i][j] = s
            matrix[j][i] = s
    end = time()
    print "TIME:",(end-start)
    return matrix
Ejemplo n.º 2
0
def patternRatioAct(dataArray):
    print "First list of samples."
    sampleNameList1,metadata1,interval11,interval21 = createSampleNameList(dataArray)
    print "Second list of samples."
    sampleNameList2,metadata2,interval12,interval22 = createSampleNameList(dataArray)
    commonPatternsList = enumerateCommonPatterns(dataArray[7],sampleNameList1,sampleNameList2)
    specificPatternsList1 = enumerateSpecificPatterns(dataArray[7],sampleNameList1,sampleNameList2)
    specificPatternsList2 = enumerateSpecificPatterns(dataArray[7],sampleNameList2,sampleNameList1)
    pRatio = patternRatio(commonPatternsList,specificPatternsList1,specificPatternsList2)
    #Only printing patterns of length > 1
    print "\n--- Total number of common patterns: ",len(commonPatternsList)
    print "--- Common patterns of length > 1 ---"
    if commonPatternsList:
        for x in commonPatternsList:
            if len(x[0]) > 1:
                print x[0]
    else:
        print "No pattern of length > 1."
    print "\n--- Total number of specific patterns in",sampleNameList1
    if metadata1:
        print "selected on metadata: ",str(metadata1),"with lower and upper bounds being",str(interval11),"and",str(interval21),":"
    print len(specificPatternsList1)
    print "--- Specific patterns of length > 1 in",sampleNameList1,"---"
    if specificPatternsList1:
        for x in specificPatternsList1:
            if len(x[0]) > 1:
                print x[0]
    else:
        print "No pattern of length > 1."
    print "\n--- Total number of specific patterns in",sampleNameList2
    if metadata2:
        print "selected on metadata: ",str(metadata2),"with lower and upper bounds being",str(interval12),"and",str(interval22),":"
    print len(specificPatternsList2)
    print "--- Specific patterns of length > 1 in",sampleNameList2,"---"
    if specificPatternsList2:
        for x in specificPatternsList2:
            if len(x[0]) > 1:
                print x[0]
    else:
        print "No pattern of length > 1."
    print "\nPattern Ratio is: ",pRatio,"\n"
    print "[ If pattern ratio is superior to one, it means the two groups of samples are quite alike. Please read README ]"
    print "[ If you obtained +inf, if there are common patterns (of length 1 or superior to 1), it could mean both groups of samples contain exactly the same set of nodes. If there is no common pattern, it could mean there is no sample in both groups ]\n"
    answer = raw_input("Save the results? Y/N\n")
    if (answer == "Y"):
        data = "Pattern Ratio Results ****\nfor lists of samples " + str(sampleNameList1) + "\n"
        if metadata1:
            data += "selected on metadata: " + str(metadata1) + " with lower and upper bounds being " + str(interval11) + " and " + str(interval21) + "\n"
        data += "\nand " + str(sampleNameList2) + "\n"
        if metadata2:
            data += "selected on metadata: " + str(metadata2) + " with lower and upper bounds being " + str(interval12) + " and " + str(interval22) + "\n"
        data += "\n-> Pattern Ratio is: " + str(pRatio) + "\n\nPrinting patterns: first is the list of nodes in the pattern, then the total number of assignations in this pattern and eventually the total number of nodes in the pattern\n\n-> Common Patterns:\n"
        for x in commonPatternsList:
            data += str(x) + "\n"
        data += "\n-> Specific patterns to " + str(sampleNameList1) + ":\n"
        for x in specificPatternsList1:
            data += str(x) + "\n"
        data += "\n-> Specific patterns to " + str(sampleNameList2) + ":\n"
        for x in specificPatternsList2:
            data += str(x) + "\n"
        data += "\nEND OF FILE ****"
        writeFile(data,"","text")
    elif not (answer == "N"):
        print "/!\ You should answer 'Y' or 'N'!"