def main(): tTree = raw_input("Write down the file name of the taxonomic tree in the folder \"meta\" [ without the extension .tree ]\n") if (tTree == ""): tTree = "GGdb2015" oMatrix = raw_input("Write down the CSV file name of the occurrence matrix in the folder \"meta\" [ without the extension .csv ]\n") if (oMatrix == ""): oMatrix = "MGAcount_complete" iMatrix = raw_input("Write down the CSV file name of the data matrix in the folder \"meta\" [ without the extension .csv ]\n") if (iMatrix == ""): iMatrix = "Info" print "/!\ Data getting parsed..." try: samplesInfoList,infoList = parseInfo(iMatrix) sampleIDList = getSampleIDList(samplesInfoList) except IOError: print "\nERROR: Maybe the filename",iMatrix,".csv does not exist in \"meta\" folder\n" s.exit(0) print "..." try: samplesOccList,speciesList = parseMatrix(oMatrix) except IOError: print "\nERROR: Maybe the filename",oMatrix,".csv does not exist in \"meta\" folder\n" s.exit(0) print "..." try: paths,n,nodesList = parseTree(tTree) except IOError: print "\nERROR: Maybe the filename",tTree,".tree does not exist in \"meta\" folder\n" s.exit(0) print "-- End of parsing\n" print "/!\ Constructing the whole annotated taxonomic tree" print "[ You may have to wait for a few seconds... ]" taxoTree = TaxoTree("Root").addNode(paths,nodesList,samplesOccList) print "-- End of construction\n" dataArray = [samplesInfoList,infoList,samplesOccList,speciesList,paths,n,nodesList,taxoTree,sampleIDList] answer = "" while not ((answer == "exit") or (answer == "exit()") or (answer == "quit")): try: print "What do you want to do?" print "[Write down the number matching with the action required. Details are in README file]" print " 1: Run the program" print " 2: Print the taxonomic tree" print "[To quit, write down exit]" answer = raw_input("Your answer?\n") if (answer =="1"): runAct(dataArray) print "-- End \n" elif (answer == "2"): printTreeAct(dataArray) print "-- End \n" elif not ((answer == "exit") or (answer == "exit()") or (answer == "quit")): print "/!\ ERROR: Please enter a number between 1 and 2 included, or 'exit' if you want to quit." raise ValueError except ValueError: print "/!\ ERROR: Please look at the line above." print "/!\ ERROR: If the line above is blank, it may be an uncatched ValueError.\n"
def main(): iMatrix = raw_input("Write down the CSV file name of the data matrix in the folder \"meta\" [ without the extension .csv ]\n") if (iMatrix == ""): iMatrix = "Info" fastaFileName = raw_input("Write down the MATCH file names in the folder \"meta/match\" [ without the extension .fasta ]\n") if (fastaFileName == ""): fastaFileName = "GREENGENES_gg16S_unaligned_10022015" print "/!\ Data getting parsed..." try: samplesInfoList,infoList = parseInfo(iMatrix) filenames = [sample[0] for sample in samplesInfoList] except IOError: print "\nERROR: Maybe the filename",iMatrix,".csv does not exist in \"meta\" folder.\n" s.exit(0) print "-- End of parsing\n" sb.call("ls ./meta/match > sampleidlist",shell=True) sampleidlist = sb.check_output("sed 's/.match//g' sampleidlist | sed 's/testfiles//g' | sed '/^$/d'",shell=True).split() sb.call("rm -f sampleidlist",shell=True) result = sb.check_output("ls ./meta/match/testfiles",shell=True) if not result: print "/!\ Pre-processing files for parsing..." process(sampleidlist) print "/!\ Pre-processing done." print "/!\ Constructing the features vectors..." sampleList = mergeList(sampleidlist,filenames) try: matchingNodes,idSequences,_,_ = featuresCreate(sampleList,fastaFileName) except ValueError: print "/!\ ERROR: Please look at the line above." print "/!\ ERROR: If the line above is blank, it may be an uncatched ValueError.\n" s.exit(0) print "-- End of construction\n" dataArray = [samplesInfoList,infoList,sampleList,idSequences,matchingNodes] answer = "" while not ((answer == "exit") or (answer == "exit()") or (answer == "quit")): try: print "What do you want to do?" print "[Write down the number matching with the action required. Details are in README file]" print " 1: User node selection" print " 2: Random sub-sampling" print "[To quit, write down exit]" answer = raw_input("Your answer?\n") if (answer =="1"): userNodeSelectionAct(dataArray) print "-- End \n" elif (answer == "2"): randomSubSamplingAct(dataArray) print "-- End \n" elif not ((answer == "exit") or (answer == "exit()") or (answer == "quit")): print "\n/!\ ERROR: Please enter a number between 1 and 2 included, or 'exit' if you want to quit." raise ValueError except ValueError: print "/!\ ERROR: Please look at the line above." print "/!\ ERROR: If the line above is blank, it may be an uncatched ValueError.\n"
def expectMatrix(matrix,n,m): exp = 0 if not n or not m: print "\n/!\ ERROR: Math problem (Division by zero)." raise ValueError for i in range(n): for j in range(m): try: #All values are theoretically non-negative #Since we have to deal only with integers (numpy module in Python...) #-1 signifies infinite value (see computeDiscriminatoryDistance.py) #We ignore these infinite values in the calculus of the expectation if not (matrix[i][j] == inf): exp += matrix[i][j]/(n*m) except IndexError: print "BUG" print matrix[i],matrix[j] from misc import getSampleIDList from parsingInfo import parseInfo sampleInfoList,_ = parseInfo("Info") sampleIDList = getSampleIDList(sampleInfoList) print sampleIDList[i],sampleIDList[j] print "END BUG" return exp
def main(): iMatrix = raw_input( "Write down the CSV file name of the data matrix in the folder \"meta\" [ without the extension .csv ]\n" ) if (iMatrix == ""): iMatrix = "Info" fastaFileName = raw_input( "Write down the MATCH file names in the folder \"meta/match\" [ without the extension .fasta ]\n" ) if (fastaFileName == ""): fastaFileName = "GREENGENES_gg16S_unaligned_10022015" print "/!\ Data getting parsed..." try: samplesInfoList, infoList = parseInfo(iMatrix) filenames = [sample[0] for sample in samplesInfoList] except IOError: print "\nERROR: Maybe the filename", iMatrix, ".csv does not exist in \"meta\" folder.\n" s.exit(0) print "-- End of parsing\n" sb.call("ls ./meta/match > sampleidlist", shell=True) sampleidlist = sb.check_output( "sed 's/.match//g' sampleidlist | sed 's/testfiles//g' | sed '/^$/d'", shell=True).split() sb.call("rm -f sampleidlist", shell=True) result = sb.check_output("ls ./meta/match/testfiles", shell=True) if not result: print "/!\ Pre-processing files for parsing..." process(sampleidlist) print "/!\ Pre-processing done." print "/!\ Constructing the features vectors..." sampleList = mergeList(sampleidlist, filenames) try: matchingNodes, idSequences, _, _ = featuresCreate( sampleList, fastaFileName) except ValueError: print "/!\ ERROR: Please look at the line above." print "/!\ ERROR: If the line above is blank, it may be an uncatched ValueError.\n" s.exit(0) print "-- End of construction\n" dataArray = [ samplesInfoList, infoList, sampleList, idSequences, matchingNodes ] answer = "" while not ((answer == "exit") or (answer == "exit()") or (answer == "quit")): try: print "What do you want to do?" print "[Write down the number matching with the action required. Details are in README file]" print " 1: User node selection" print " 2: Random sub-sampling" print "[To quit, write down exit]" answer = raw_input("Your answer?\n") if (answer == "1"): userNodeSelectionAct(dataArray) print "-- End \n" elif (answer == "2"): randomSubSamplingAct(dataArray) print "-- End \n" elif not ((answer == "exit") or (answer == "exit()") or (answer == "quit")): print "\n/!\ ERROR: Please enter a number between 1 and 2 included, or 'exit' if you want to quit." raise ValueError except ValueError: print "/!\ ERROR: Please look at the line above." print "/!\ ERROR: If the line above is blank, it may be an uncatched ValueError.\n"
def main(): tTree = raw_input("Write down the file name of the taxonomic tree in the folder \"meta\" [ without the extension .tree ]\n") if (tTree == ""): tTree = "GGdb2015" oMatrix = raw_input("Write down the CSV file name of the occurrence matrix in the folder \"meta\" [ without the extension .csv ]\n") if (oMatrix == ""): oMatrix = "MGAcount_complete" iMatrix = raw_input("Write down the CSV file name of the data matrix in the folder \"meta\" [ without the extension .csv ]\n") if (iMatrix == ""): iMatrix = "Info" print "/!\ Data getting parsed..." try: samplesInfoList,infoList = parseInfo(iMatrix) sampleIDList = getSampleIDList(samplesInfoList) except IOError: print "\nERROR: Maybe the filename",iMatrix,".csv does not exist in \"meta\" folder\n" s.exit(0) print "..." try: samplesOccList,speciesList = parseMatrix(oMatrix) except IOError: print "\nERROR: Maybe the filename",oMatrix,".csv does not exist in \"meta\" folder\n" s.exit(0) print "..." try: paths,n,nodesList = parseTree(tTree) except IOError: print "\nERROR: Maybe the filename",tTree,".tree does not exist in \"meta\" folder\n" s.exit(0) print "-- End of parsing\n" print "/!\ Constructing the whole annotated taxonomic tree" print "[ You may have to wait for a few seconds... ]" taxoTree = TaxoTree("Root").addNode(paths,nodesList,samplesOccList) print "-- End of construction\n" dataArray = [samplesInfoList,infoList,samplesOccList,speciesList,paths,n,nodesList,taxoTree,sampleIDList] answer = "" while not ((answer == "exit") or (answer == "exit()") or (answer == "quit")): try: print "What do you want to do?" print "[Write down the number matching with the action required. Details are in README file]" print " 1: Total ratio and Diff ratio" print " 2: Pattern ratio" print " 3: Percentage of assignments in a certain group of bacterias depending on metadata" print " 4: Pearson correlation coefficient" print " 5: Microbial diversity in samples" print " 6: Similarity coefficients between patients" print " 7: Print the taxonomic tree" print " 8: Plot a graph, or a pie chart" print " 9: Compute total distance between two samples" print "[To quit, write down exit]" answer = raw_input("Your answer?\n") if (answer =="1"): totalDiffRatioAct(dataArray) print "-- End \n" elif (answer == "2"): patternRatioAct(dataArray) print "-- End \n" elif (answer == "3"): percentageAct(dataArray) print "-- End \n" elif (answer == "4"): pearsonAct(dataArray) print "-- End \n" elif (answer == "5"): diversityAct(dataArray) print "-- End\n" elif (answer == "6"): matrixSim = similarityAct(dataArray,iMatrix) dataArray.append(matrixSim) print "-- End \n" elif (answer == "7"): printTreeAct(dataArray) print "-- End \n" elif (answer == "8"): plottingAct(dataArray) print "-- End \n" elif (answer == "9"): distanceAct(dataArray) print "-- End \n" elif not ((answer == "exit") or (answer == "exit()") or (answer == "quit")): print "\n/!\ ERROR: Please enter a number between 1 and 9 included, or 'exit' if you want to quit." raise ValueError except ValueError: print "/!\ ERROR: Please look at the line above." print "/!\ ERROR: If the line above is blank, it may be an uncatched ValueError.\n"
def main(): iMatrix = raw_input("Write down the CSV file name of the data matrix in the folder \"meta\" [ without the extension .csv ]\n") if (iMatrix == ""): iMatrix = "Info" fastaFileName = raw_input("Write down the FASTA file name in the folder \"meta\" [ without the extension .fasta ]\n") if (fastaFileName == ""): fastaFileName = "GREENGENES_gg16S_unaligned_10022015" print "/!\ Data getting parsed..." try: samplesInfoList,infoList = parseInfo(iMatrix) filenames = [sample[0] for sample in samplesInfoList] except IOError: print "\nERROR: Maybe the filename",iMatrix,".csv does not exist in \"meta\" folder.\n" s.exit(0) print "-- End of parsing\n" sb.call("ls ./meta/match > sampleidlist",shell=True) sampleidlist = sb.check_output("sed 's/.match//g' sampleidlist | sed 's/testfiles//g' | sed '/^$/d'",shell=True).split() sb.call("rm -f sampleidlist",shell=True) result = sb.check_output("ls ./meta/match/testfiles",shell=True) if not result: print "/!\ Pre-processing files for parsing..." process(sampleidlist) print "/!\ Pre-processing done." print "/!\ Constructing the features vectors..." sampleList = mergeList(sampleidlist,filenames) try: matchingNodes,idSequences,paths,nodesListTree = featuresCreate(sampleList,fastaFileName) except ValueError: print "/!\ ERROR: Please look at the line above." print "/!\ ERROR: If the line above is blank, it may be an uncatched ValueError.\n" s.exit(0) print "-- End of construction" print "/!\ Constructing the whole taxonomic tree..." print "[ You may have to wait a few seconds... ]" taxoTree = TaxoTree("Root").addNode(paths,nodesListTree) print "-- End of construction\n" dataArray = [samplesInfoList,infoList,idSequences,sampleList,matchingNodes,paths,nodesListTree,taxoTree] filesList = sb.check_output("ls ./meta | awk '/.dist/'",shell=True).split() if not filesList: print "/!\ Computing distance matrix... (1/2)" start = time() computeDistanceMatrix(distMatched,dataArray) end = time() print "TIME:",end-start,"sec" print "/!\ Computing distance matrix... (2/2)" start = time() computeDistanceMatrix(distConsensus,dataArray) end = time() print "TIME:",end-start,"sec" print "-- End of computation." answer = None done = False while not done and not (answer == "exit" or answer == "exit()" or answer == "quit" or answer == "quit()"): try: answer = raw_input("Do you want to compute distance matrix for another value of q or import pre-computed matrices? compute/import\n") if answer == "compute": print "/!\ Computing distance matrix..." start = time() computeDistanceMatrix(distConsensus,dataArray) end = time() print "TIME:",end-start,"sec" print "-- End of computation." done = True elif not (answer == "import"): print "\n/!\ You should answer by 'compute' or 'import'!" raise ValueError done = True undone = True while undone: qList = sorted(sb.check_output("ls ./meta | awk '/.dist/' | sed 's/matrix[1-2]//g' | sed 's/.dist//g'",shell=True).split()) print "List of pre-computed q:",qList q = raw_input("Choose q among the ones above.\n") if float(q) < 0 or float(q) > 1 or not (q in qList): print "\n/!\ ERROR: Wrong value of q [should be between 0 and 1, and be already computed]:",q,".\n" continue else: distMatchedDict = importMatrixToDict("matrix1",dataArray) distConsensusDict = importMatrixToDict("matrix2" + q,dataArray) undone = False dataArray.append(distMatchedDict) dataArray.append(distConsensusDict) print "/!\ Matrices imported." except ValueError: continue answer = "" while not ((answer == "exit") or (answer == "exit()") or (answer == "quit") or (answer == "quit()")): try: print "What do you want to do?" print "[Write down the number matching with the action required. Details are in README file]" print " 1: Clustering" print " 2: Print the taxonomic tree" print "[To quit, write down exit]" answer = raw_input("Your answer?\n") if (answer =="1"): clusteringAct(dataArray) print "-- End \n" elif (answer == "2"): printTreeAct(dataArray) print "-- End \n" elif not ((answer == "exit") or (answer == "exit()") or (answer == "quit")): print "\n/!\ ERROR: Please enter a number between 1 and 2 included, or 'exit' if you want to quit." raise ValueError except ValueError: print "/!\ ERROR: Please look at the line above." print "/!\ ERROR: If the line above is blank, it may be an uncatched ValueError.\n"