def userNodeSelectionAct(dataArray): print dataArray[1] metadatum = sanitize(raw_input("Input the metadatum that will cluster the set of samples among those written above. [ e.g. " + dataArray[1][0] + " ]\n")).split(";")[0] isInDatabase([metadatum],dataArray[1]) listnodes = dataArray[3].values() nodesList = parseListNode(raw_input("Choose the group of nodes you want to consider exclusively. [ Read the taxonomic tree to help you: e.g. " + sanitizeNode(listnodes[-3]) + ";" + sanitizeNode(listnodes[1]) + ";" + sanitizeNode(listnodes[-1]) + " ]\n")) isInDatabase(nodesList,listnodes) numberSamples = len(dataArray[0]) numberStartingSamples = sanitize(raw_input("Knowing there is/are " + str(numberSamples) + " sample(s), how many samples do you want to create the training set?\n")) x = integer.match(numberStartingSamples) if not x or (x and int(numberStartingSamples) > numberSamples) : print "\n/!\ ERROR: You should write down an integer inferior or equal to ",numberSamples,"." raise ValueError numberStartingSamples = int(numberStartingSamples) assignedClasses,classes,valueSet = classifyIt(dataArray,metadatum,nodesList,numberStartingSamples) numberClass = len(classes) youdenJ = countYouden(assignedClasses,classes,numberStartingSamples) interpretIt(youdenJ) answer = raw_input("Do you want to plot the classes obtained as a pie chart? Y/N") if answer == "Y": labels = [ metadatum + " = " + str(value) for value in valueSet ] percentagesAs = [ len(class1) for class1 in assignedClasses ] percentages = [ len(class1) for class1 in classes ] plotPieChart(labels,percentagesAs,"Assignments depending on " + str(nodesList) + " to class for metadatum " + metadatum) plotPieChart(labels,percentages,"Real classes depending on " + str(nodesList) + " for metadatum " + metadatum) elif not (answer == "N"): print "\n Answer by Y or N!" answer = raw_input("Do you want to save the results? Y/N") if (answer == "Y"): writeFile("Youden's J statistic for this classification is: " + str(youdenJ) + "\n","Assignments depending on " + listNodes(nodesList) + " to classes for metadatum " + metadatum) elif not (answer == "N"): print "\n Answer by Y or N!" return assignedClasses,youdenJ
def userNodeSelectionAct(dataArray): print dataArray[1] metadatum = sanitize( raw_input( "Input the metadatum that will cluster the set of samples among those written above. [ e.g. " + dataArray[1][0] + " ]\n")).split(";")[0] isInDatabase([metadatum], dataArray[1]) listnodes = dataArray[3].values() nodesList = parseListNode( raw_input( "Choose the group of nodes you want to consider exclusively. [ Read the taxonomic tree to help you: e.g. " + sanitizeNode(listnodes[-3]) + ";" + sanitizeNode(listnodes[1]) + ";" + sanitizeNode(listnodes[-1]) + " ]\n")) isInDatabase(nodesList, listnodes) numberSamples = len(dataArray[0]) numberStartingSamples = sanitize( raw_input( "Knowing there is/are " + str(numberSamples) + " sample(s), how many samples do you want to create the training set?\n" )) x = integer.match(numberStartingSamples) if not x or (x and int(numberStartingSamples) > numberSamples): print "\n/!\ ERROR: You should write down an integer inferior or equal to ", numberSamples, "." raise ValueError numberStartingSamples = int(numberStartingSamples) assignedClasses, classes, valueSet = classifyIt(dataArray, metadatum, nodesList, numberStartingSamples) numberClass = len(classes) youdenJ = countYouden(assignedClasses, classes, numberStartingSamples) interpretIt(youdenJ) answer = raw_input( "Do you want to plot the classes obtained as a pie chart? Y/N") if answer == "Y": labels = [metadatum + " = " + str(value) for value in valueSet] percentagesAs = [len(class1) for class1 in assignedClasses] percentages = [len(class1) for class1 in classes] plotPieChart( labels, percentagesAs, "Assignments depending on " + str(nodesList) + " to class for metadatum " + metadatum) plotPieChart( labels, percentages, "Real classes depending on " + str(nodesList) + " for metadatum " + metadatum) elif not (answer == "N"): print "\n Answer by Y or N!" answer = raw_input("Do you want to save the results? Y/N") if (answer == "Y"): writeFile( "Youden's J statistic for this classification is: " + str(youdenJ) + "\n", "Assignments depending on " + listNodes(nodesList) + " to classes for metadatum " + metadatum) elif not (answer == "N"): print "\n Answer by Y or N!" return assignedClasses, youdenJ
def randomSubSamplingAct(dataArray): print dataArray[1] metadatum = sanitize(raw_input("Input the metadatum that will cluster the set of samples among those written above. [ e.g. " + dataArray[1][0] + " ]\n")).split(";")[0] isInDatabase([metadatum],dataArray[1]) s = raw_input("Input the number s of random samplings.\n") n = raw_input("Input the number n of nodes to select at each try.\n") if not integer.match(s) or not integer.match(n): print "\n/!\ ERROR: s and n must both be integers." raise ValueError numberSamples = len(dataArray[0]) numberStartingSamples = sanitize(raw_input("Knowing there is/are " + str(numberSamples) + "sample(s), how many samples do you want to create the training set? \n")) x = integer.match(numberStartingSamples) if not x or (x and int(numberStartingSamples) > numberSamples): print "\n/!\ ERROR: You should write down an integer." raise ValueError numberStartingSamples = int(numberStartingSamples) listnodes = dataArray[3].values() s,n = int(s),int(n) #Here the set of classes is a list of two lists containing the samples in C and not C bestClassification = [] bestClassesList = [] #Worse value for this coefficient currBestYouden = inf nodesNumber = len(dataArray[3]) while s: #Randomly draw n distinct nodes among the nodes in the taxonomic tree nodesList = randomChoice(listnodes,n) assignedClasses,classes,valueSet = classifyIt(dataArray,metadatum,nodesList,numberStartingSamples) numberClass = len(classes) youdenJ = countYouden(assignedClasses,classes,numberSamples) res = numberClass - youdenJ if min(res,currBestYouden) == res: bestClassification = [] for i in nodesList: bestClassification.append(i) currBestYouden = res bestClassesList = [] for i in assignedClasses: bestClassesList.append(i) s -= 1 interpretIt(numberClass - currBestYouden) if answer == "Y": percentagesAs = [ len(class1) for class1 in assignedClasses ] labels = [ metadatum + " = " + str(value) for value in valueSet ] percentages = [ len(class1) for class1 in classes ] plotPieChart(labels,percentagesAs,"Assignments depending on " + listNodes(nodesList) + " to class for metadatum " + metadatum) plotPieChart(labels,percentages,"Real classes depending on " + listNodes(nodesList) + " for metadatum " + metadatum) answer = raw_input("Do you want to save the results? Y/N") if (answer == "Y"): writeFile("Best Youden's J statistic for this classification is: " + str(numberClass - currBestYouden) + "\nand most relevant list of nodes for this metadatum is:" + str(bestClassification),"Assignments to classes for metadatum " + metadatum) elif not (answer == "N"): print "\n Answer by Y or N!" return bestClassification,(numberClass - currBestYouden),bestClassesList
def plottingAct(dataArray): creatingArrayOutput = creatingArray(dataArray) if creatingArrayOutput[0] == "graph": graphTypeInput,xArray,yArray,typeInput,valueInput1,valueInput2 = creatingArrayOutput maxx,minix = getMaxMin(xArray) maxy,miniy = getMaxMin(yArray) if typeInput == "BB": xLabel = "Group of bacteria 1" yLabel = "Group of bacteria 2" #typeInput == "BM" else: xLabel = "Group of bacteria" yLabel = "Metadatum" plotGraph(xArray,yArray,xLabel=xLabel,yLabel=yLabel,maxx=maxx+1,minx=minix-1,maxy=maxy+1,miny=miniy-1,title="Plotting of type " + typeInput + " with values " + str(valuesInput1[:3]) + "... and " + str(valuesInput2[:3]) + "...") elif creatingArrayOutput[0] == "pie": graphTypeInput,result,nodesGroup,sampleNameList,metadataList = creatingArrayOutput plotPieChart(sampleNameList,result,"Assignments to the group of bacterias: " + str(nodesGroup) + " depending on samples") else: print "\n/!\ ERROR: [BUG] [actions/plottingAct] Unknown type of graph." raise ValueError
def diversityAct(dataArray): sampleNameList,metadata,interval1,interval2 = createSampleNameList(dataArray) #@dataArray[5] = n is the number of nodes in the taxonomic tree coefficient,sample = computeDiversityCoefficient(dataArray[5],sampleNameList,dataArray) print "\nMicrobial Diversity coefficient is: " + str(coefficient) print "[If you have obtained -inf, it could mean the taxonomic tree is actually empty.]\n" answer = raw_input("Save the results? Y/N\n") if (answer == "Y"): data = "Microbial Diversity Results ****\n for lists " + str(sampleNameList) + "\n" if metadata: data += "selected on metadata: " + str(metadata) + "with extreme values: " + str(interval1) + " (lower bounds) and " + str(interval2) + " (upper bounds) \n" data += "\nMicrobial Diversity coefficient is: " + str(coefficient) +"\n\nEND OF FILE ****" writeFile(data,"","text") elif not (answer == "N"): print "/!\ You should answer 'Y' or 'N'!" answer = raw_input("Do you want to display the pie chart of the assignments to the taxonomic tree in the selected samples? Y/N\n") if (answer == "Y"): plotPieChart([sample1[0] for sample1 in sample],[sample1[1] for sample1 in sample],"Assignments to the taxonomic tree in " + str(sampleNameList[:3])) elif not (answer == "N"): print "/!\ You should answer 'Y' or 'N'!"
def randomSubSamplingAct(dataArray): print dataArray[1] metadatum = sanitize( raw_input( "Input the metadatum that will cluster the set of samples among those written above. [ e.g. " + dataArray[1][0] + " ]\n")).split(";")[0] isInDatabase([metadatum], dataArray[1]) s = raw_input("Input the number s of random samplings.\n") n = raw_input("Input the number n of nodes to select at each try.\n") if not integer.match(s) or not integer.match(n): print "\n/!\ ERROR: s and n must both be integers." raise ValueError numberSamples = len(dataArray[0]) numberStartingSamples = sanitize( raw_input( "Knowing there is/are " + str(numberSamples) + "sample(s), how many samples do you want to create the training set? \n" )) x = integer.match(numberStartingSamples) if not x or (x and int(numberStartingSamples) > numberSamples): print "\n/!\ ERROR: You should write down an integer." raise ValueError numberStartingSamples = int(numberStartingSamples) listnodes = dataArray[3].values() s, n = int(s), int(n) #Here the set of classes is a list of two lists containing the samples in C and not C bestClassification = [] bestClassesList = [] #Worse value for this coefficient currBestYouden = inf nodesNumber = len(dataArray[3]) while s: #Randomly draw n distinct nodes among the nodes in the taxonomic tree nodesList = randomChoice(listnodes, n) assignedClasses, classes, valueSet = classifyIt( dataArray, metadatum, nodesList, numberStartingSamples) numberClass = len(classes) youdenJ = countYouden(assignedClasses, classes, numberSamples) res = numberClass - youdenJ if min(res, currBestYouden) == res: bestClassification = [] for i in nodesList: bestClassification.append(i) currBestYouden = res bestClassesList = [] for i in assignedClasses: bestClassesList.append(i) s -= 1 interpretIt(numberClass - currBestYouden) if answer == "Y": percentagesAs = [len(class1) for class1 in assignedClasses] labels = [metadatum + " = " + str(value) for value in valueSet] percentages = [len(class1) for class1 in classes] plotPieChart( labels, percentagesAs, "Assignments depending on " + listNodes(nodesList) + " to class for metadatum " + metadatum) plotPieChart( labels, percentages, "Real classes depending on " + listNodes(nodesList) + " for metadatum " + metadatum) answer = raw_input("Do you want to save the results? Y/N") if (answer == "Y"): writeFile( "Best Youden's J statistic for this classification is: " + str(numberClass - currBestYouden) + "\nand most relevant list of nodes for this metadatum is:" + str(bestClassification), "Assignments to classes for metadatum " + metadatum) elif not (answer == "N"): print "\n Answer by Y or N!" return bestClassification, (numberClass - currBestYouden), bestClassesList