def userNodeSelectionAct(dataArray):
    print dataArray[1]
    metadatum = sanitize(raw_input("Input the metadatum that will cluster the set of samples among those written above. [ e.g. " + dataArray[1][0] + " ]\n")).split(";")[0]
    isInDatabase([metadatum],dataArray[1])
    listnodes = dataArray[3].values()
    nodesList = parseListNode(raw_input("Choose the group of nodes you want to consider exclusively. [ Read the taxonomic tree to help you: e.g. " + sanitizeNode(listnodes[-3]) + ";" + sanitizeNode(listnodes[1]) + ";" + sanitizeNode(listnodes[-1]) + " ]\n"))
    isInDatabase(nodesList,listnodes)
    numberSamples = len(dataArray[0])
    numberStartingSamples = sanitize(raw_input("Knowing there is/are " + str(numberSamples) + " sample(s), how many samples do you want to create the training set?\n"))
    x = integer.match(numberStartingSamples)
    if not x or (x and int(numberStartingSamples) > numberSamples) :
        print "\n/!\ ERROR: You should write down an integer inferior or equal to ",numberSamples,"."
        raise ValueError
    numberStartingSamples = int(numberStartingSamples)
    assignedClasses,classes,valueSet = classifyIt(dataArray,metadatum,nodesList,numberStartingSamples)
    numberClass = len(classes)
    youdenJ = countYouden(assignedClasses,classes,numberStartingSamples)
    interpretIt(youdenJ)
    answer = raw_input("Do you want to plot the classes obtained as a pie chart? Y/N")
    if answer == "Y":
        labels = [ metadatum + " = " + str(value) for value in valueSet ]
        percentagesAs = [ len(class1) for class1 in assignedClasses ]
        percentages = [ len(class1) for class1 in classes ]
        plotPieChart(labels,percentagesAs,"Assignments depending on " + str(nodesList) + " to class for metadatum " + metadatum)
        plotPieChart(labels,percentages,"Real classes depending on " + str(nodesList) + " for metadatum " + metadatum)
    elif not (answer == "N"):
        print "\n Answer by Y or N!"
    answer = raw_input("Do you want to save the results? Y/N")
    if (answer == "Y"):
        writeFile("Youden's J statistic for this classification is: " + str(youdenJ) + "\n","Assignments depending on " + listNodes(nodesList) + " to classes for metadatum " + metadatum)
    elif not (answer == "N"):
        print "\n Answer by Y or N!"
    return assignedClasses,youdenJ
def userNodeSelectionAct(dataArray):
    print dataArray[1]
    metadatum = sanitize(
        raw_input(
            "Input the metadatum that will cluster the set of samples among those written above. [ e.g. "
            + dataArray[1][0] + " ]\n")).split(";")[0]
    isInDatabase([metadatum], dataArray[1])
    listnodes = dataArray[3].values()
    nodesList = parseListNode(
        raw_input(
            "Choose the group of nodes you want to consider exclusively. [ Read the taxonomic tree to help you: e.g. "
            + sanitizeNode(listnodes[-3]) + ";" + sanitizeNode(listnodes[1]) +
            ";" + sanitizeNode(listnodes[-1]) + " ]\n"))
    isInDatabase(nodesList, listnodes)
    numberSamples = len(dataArray[0])
    numberStartingSamples = sanitize(
        raw_input(
            "Knowing there is/are " + str(numberSamples) +
            " sample(s), how many samples do you want to create the training set?\n"
        ))
    x = integer.match(numberStartingSamples)
    if not x or (x and int(numberStartingSamples) > numberSamples):
        print "\n/!\ ERROR: You should write down an integer inferior or equal to ", numberSamples, "."
        raise ValueError
    numberStartingSamples = int(numberStartingSamples)
    assignedClasses, classes, valueSet = classifyIt(dataArray, metadatum,
                                                    nodesList,
                                                    numberStartingSamples)
    numberClass = len(classes)
    youdenJ = countYouden(assignedClasses, classes, numberStartingSamples)
    interpretIt(youdenJ)
    answer = raw_input(
        "Do you want to plot the classes obtained as a pie chart? Y/N")
    if answer == "Y":
        labels = [metadatum + " = " + str(value) for value in valueSet]
        percentagesAs = [len(class1) for class1 in assignedClasses]
        percentages = [len(class1) for class1 in classes]
        plotPieChart(
            labels, percentagesAs, "Assignments depending on " +
            str(nodesList) + " to class for metadatum " + metadatum)
        plotPieChart(
            labels, percentages, "Real classes depending on " +
            str(nodesList) + " for metadatum " + metadatum)
    elif not (answer == "N"):
        print "\n Answer by Y or N!"
    answer = raw_input("Do you want to save the results? Y/N")
    if (answer == "Y"):
        writeFile(
            "Youden's J statistic for this classification is: " +
            str(youdenJ) + "\n", "Assignments depending on " +
            listNodes(nodesList) + " to classes for metadatum " + metadatum)
    elif not (answer == "N"):
        print "\n Answer by Y or N!"
    return assignedClasses, youdenJ
def randomSubSamplingAct(dataArray):
    print dataArray[1]
    metadatum = sanitize(raw_input("Input the metadatum that will cluster the set of samples among those written above. [ e.g. " + dataArray[1][0] + " ]\n")).split(";")[0]
    isInDatabase([metadatum],dataArray[1])
    s = raw_input("Input the number s of random samplings.\n")
    n = raw_input("Input the number n of nodes to select at each try.\n")
    if not integer.match(s) or not integer.match(n):
        print "\n/!\ ERROR: s and n must both be integers."
        raise ValueError
    numberSamples = len(dataArray[0])
    numberStartingSamples = sanitize(raw_input("Knowing there is/are " + str(numberSamples) + "sample(s), how many samples do you want to create the training set? \n"))
    x = integer.match(numberStartingSamples)
    if not x or (x and int(numberStartingSamples) > numberSamples):
        print "\n/!\ ERROR: You should write down an integer."
        raise ValueError
    numberStartingSamples = int(numberStartingSamples)
    listnodes = dataArray[3].values()
    s,n = int(s),int(n)
    #Here the set of classes is a list of two lists containing the samples in C and not C
    bestClassification = []
    bestClassesList = []
    #Worse value for this coefficient
    currBestYouden = inf
    nodesNumber = len(dataArray[3])
    while s:
        #Randomly draw n distinct nodes among the nodes in the taxonomic tree
        nodesList = randomChoice(listnodes,n)
        assignedClasses,classes,valueSet = classifyIt(dataArray,metadatum,nodesList,numberStartingSamples)
        numberClass = len(classes)
        youdenJ = countYouden(assignedClasses,classes,numberSamples)
        res = numberClass - youdenJ
        if min(res,currBestYouden) == res:
            bestClassification = []
            for i in nodesList:
                bestClassification.append(i)
            currBestYouden = res
            bestClassesList = []
            for i in assignedClasses:
                bestClassesList.append(i)
        s -= 1
    interpretIt(numberClass - currBestYouden)
    if answer == "Y":
        percentagesAs = [ len(class1) for class1 in assignedClasses ]
        labels = [ metadatum + " = " + str(value) for value in valueSet ]
        percentages = [ len(class1) for class1 in classes ]
        plotPieChart(labels,percentagesAs,"Assignments depending on " + listNodes(nodesList) + " to class for metadatum " + metadatum)
        plotPieChart(labels,percentages,"Real classes depending on " + listNodes(nodesList) + " for metadatum " + metadatum)
    answer = raw_input("Do you want to save the results? Y/N")
    if (answer == "Y"):
        writeFile("Best Youden's J statistic for this classification is: " + str(numberClass - currBestYouden) + "\nand most relevant list of nodes for this metadatum is:" + str(bestClassification),"Assignments to classes for metadatum " + metadatum)
    elif not (answer == "N"):
        print "\n Answer by Y or N!"
    return bestClassification,(numberClass - currBestYouden),bestClassesList
Example #4
0
def plottingAct(dataArray):
    creatingArrayOutput = creatingArray(dataArray)
    if creatingArrayOutput[0] == "graph":
            graphTypeInput,xArray,yArray,typeInput,valueInput1,valueInput2 = creatingArrayOutput
            maxx,minix = getMaxMin(xArray)
            maxy,miniy = getMaxMin(yArray)
            if typeInput == "BB":
                xLabel = "Group of bacteria 1"
                yLabel = "Group of bacteria 2"
            #typeInput == "BM"
            else:
                xLabel = "Group of bacteria"
                yLabel = "Metadatum"
            plotGraph(xArray,yArray,xLabel=xLabel,yLabel=yLabel,maxx=maxx+1,minx=minix-1,maxy=maxy+1,miny=miniy-1,title="Plotting of type " + typeInput + " with values " + str(valuesInput1[:3]) + "...  and " + str(valuesInput2[:3]) + "...")
    elif creatingArrayOutput[0] == "pie":
        graphTypeInput,result,nodesGroup,sampleNameList,metadataList = creatingArrayOutput
        plotPieChart(sampleNameList,result,"Assignments to the group of bacterias: " + str(nodesGroup) + " depending on samples")
    else:
        print "\n/!\ ERROR: [BUG] [actions/plottingAct] Unknown type of graph."
        raise ValueError
Example #5
0
def diversityAct(dataArray):
    sampleNameList,metadata,interval1,interval2 = createSampleNameList(dataArray)
    #@dataArray[5] = n is the number of nodes in the taxonomic tree
    coefficient,sample = computeDiversityCoefficient(dataArray[5],sampleNameList,dataArray)
    print "\nMicrobial Diversity coefficient is: " + str(coefficient)
    print "[If you have obtained -inf, it could mean the taxonomic tree is actually empty.]\n"
    answer = raw_input("Save the results? Y/N\n")
    if (answer == "Y"):
        data = "Microbial Diversity Results ****\n for lists " + str(sampleNameList) + "\n"
        if metadata:
            data += "selected on metadata: " + str(metadata) + "with extreme values: " + str(interval1) + " (lower bounds) and " + str(interval2) + " (upper bounds) \n"
        data += "\nMicrobial Diversity coefficient is: " + str(coefficient) +"\n\nEND OF FILE ****"  
        writeFile(data,"","text")
    elif not (answer == "N"):
        print "/!\ You should answer 'Y' or 'N'!"
    answer = raw_input("Do you want to display the pie chart of the assignments to the taxonomic tree in the selected samples? Y/N\n")
    if (answer == "Y"):
        plotPieChart([sample1[0] for sample1 in sample],[sample1[1] for sample1 in sample],"Assignments to the taxonomic tree in " + str(sampleNameList[:3]))
    elif not (answer == "N"):
        print "/!\ You should answer 'Y' or 'N'!"
def randomSubSamplingAct(dataArray):
    print dataArray[1]
    metadatum = sanitize(
        raw_input(
            "Input the metadatum that will cluster the set of samples among those written above. [ e.g. "
            + dataArray[1][0] + " ]\n")).split(";")[0]
    isInDatabase([metadatum], dataArray[1])
    s = raw_input("Input the number s of random samplings.\n")
    n = raw_input("Input the number n of nodes to select at each try.\n")
    if not integer.match(s) or not integer.match(n):
        print "\n/!\ ERROR: s and n must both be integers."
        raise ValueError
    numberSamples = len(dataArray[0])
    numberStartingSamples = sanitize(
        raw_input(
            "Knowing there is/are " + str(numberSamples) +
            "sample(s), how many samples do you want to create the training set? \n"
        ))
    x = integer.match(numberStartingSamples)
    if not x or (x and int(numberStartingSamples) > numberSamples):
        print "\n/!\ ERROR: You should write down an integer."
        raise ValueError
    numberStartingSamples = int(numberStartingSamples)
    listnodes = dataArray[3].values()
    s, n = int(s), int(n)
    #Here the set of classes is a list of two lists containing the samples in C and not C
    bestClassification = []
    bestClassesList = []
    #Worse value for this coefficient
    currBestYouden = inf
    nodesNumber = len(dataArray[3])
    while s:
        #Randomly draw n distinct nodes among the nodes in the taxonomic tree
        nodesList = randomChoice(listnodes, n)
        assignedClasses, classes, valueSet = classifyIt(
            dataArray, metadatum, nodesList, numberStartingSamples)
        numberClass = len(classes)
        youdenJ = countYouden(assignedClasses, classes, numberSamples)
        res = numberClass - youdenJ
        if min(res, currBestYouden) == res:
            bestClassification = []
            for i in nodesList:
                bestClassification.append(i)
            currBestYouden = res
            bestClassesList = []
            for i in assignedClasses:
                bestClassesList.append(i)
        s -= 1
    interpretIt(numberClass - currBestYouden)
    if answer == "Y":
        percentagesAs = [len(class1) for class1 in assignedClasses]
        labels = [metadatum + " = " + str(value) for value in valueSet]
        percentages = [len(class1) for class1 in classes]
        plotPieChart(
            labels, percentagesAs, "Assignments depending on " +
            listNodes(nodesList) + " to class for metadatum " + metadatum)
        plotPieChart(
            labels, percentages, "Real classes depending on " +
            listNodes(nodesList) + " for metadatum " + metadatum)
    answer = raw_input("Do you want to save the results? Y/N")
    if (answer == "Y"):
        writeFile(
            "Best Youden's J statistic for this classification is: " +
            str(numberClass - currBestYouden) +
            "\nand most relevant list of nodes for this metadatum is:" +
            str(bestClassification),
            "Assignments to classes for metadatum " + metadatum)
    elif not (answer == "N"):
        print "\n Answer by Y or N!"
    return bestClassification, (numberClass - currBestYouden), bestClassesList