예제 #1
0
def main():
    tTree = raw_input("Write down the file name of the taxonomic tree in the folder \"meta\" [ without the extension .tree ]\n")
    if (tTree == ""):
        tTree = "GGdb2015"
    oMatrix = raw_input("Write down the CSV file name of the occurrence matrix in the folder \"meta\" [ without the extension .csv ]\n")
    if (oMatrix == ""):
        oMatrix = "MGAcount_complete"
    iMatrix = raw_input("Write down the CSV file name of the data matrix in the folder \"meta\" [ without the extension .csv ]\n")
    if (iMatrix == ""):
        iMatrix = "Info"
    print "/!\ Data getting parsed..."
    try:
        samplesInfoList,infoList = parseInfo(iMatrix)
        sampleIDList = getSampleIDList(samplesInfoList)
    except IOError:
        print "\nERROR: Maybe the filename",iMatrix,".csv does not exist in \"meta\" folder\n"
        s.exit(0)
    print "..."
    try:
        samplesOccList,speciesList = parseMatrix(oMatrix)
    except IOError:
        print "\nERROR: Maybe the filename",oMatrix,".csv does not exist in \"meta\" folder\n"
        s.exit(0)
    print "..."
    try:
        paths,n,nodesList = parseTree(tTree)
    except IOError:
        print "\nERROR: Maybe the filename",tTree,".tree does not exist in \"meta\" folder\n"
        s.exit(0)
    print "-- End of parsing\n"
    print "/!\ Constructing the whole annotated taxonomic tree"
    print "[ You may have to wait for a few seconds... ]"
    taxoTree = TaxoTree("Root").addNode(paths,nodesList,samplesOccList)
    print "-- End of construction\n"
    dataArray = [samplesInfoList,infoList,samplesOccList,speciesList,paths,n,nodesList,taxoTree,sampleIDList]
    answer = ""
    while not ((answer == "exit") or (answer == "exit()") or (answer == "quit")):
        try:
            print "What do you want to do?"
            print "[Write down the number matching with the action required. Details are in README file]"
            print "   1: Run the program"
            print "   2: Print the taxonomic tree"
            print "[To quit, write down exit]"
            answer = raw_input("Your answer?\n")
            if (answer =="1"):
                runAct(dataArray)
                print "-- End \n"
            elif (answer == "2"):
                printTreeAct(dataArray)
                print "-- End \n"
            elif not ((answer == "exit") or (answer == "exit()") or (answer == "quit")):
                print "/!\ ERROR: Please enter a number between 1 and 2 included, or 'exit' if you want to quit."
                raise ValueError
        except ValueError:
            print "/!\ ERROR: Please look at the line above."
            print "/!\ ERROR: If the line above is blank, it may be an uncatched ValueError.\n"
예제 #2
0
def main():
    iMatrix = raw_input("Write down the CSV file name of the data matrix in the folder \"meta\" [ without the extension .csv ]\n")
    if (iMatrix == ""):
        iMatrix = "Info"
    fastaFileName = raw_input("Write down the MATCH file names in the folder \"meta/match\" [ without the extension .fasta ]\n")
    if (fastaFileName == ""):
        fastaFileName = "GREENGENES_gg16S_unaligned_10022015"
    print "/!\ Data getting parsed..."
    try:
        samplesInfoList,infoList = parseInfo(iMatrix)
        filenames = [sample[0] for sample in samplesInfoList]
    except IOError:
        print "\nERROR: Maybe the filename",iMatrix,".csv does not exist in \"meta\" folder.\n"
        s.exit(0)
    print "-- End of parsing\n"
    sb.call("ls ./meta/match > sampleidlist",shell=True)
    sampleidlist = sb.check_output("sed 's/.match//g' sampleidlist | sed 's/testfiles//g' | sed '/^$/d'",shell=True).split()
    sb.call("rm -f sampleidlist",shell=True)
    result = sb.check_output("ls ./meta/match/testfiles",shell=True)
    if not result:
        print "/!\ Pre-processing files for parsing..."
        process(sampleidlist)
        print "/!\ Pre-processing done."
    print "/!\ Constructing the features vectors..."
    sampleList = mergeList(sampleidlist,filenames)
    try:
        matchingNodes,idSequences,_,_ = featuresCreate(sampleList,fastaFileName)
    except ValueError: 
        print "/!\ ERROR: Please look at the line above."
        print "/!\ ERROR: If the line above is blank, it may be an uncatched ValueError.\n"
        s.exit(0)
    print "-- End of construction\n"
    dataArray = [samplesInfoList,infoList,sampleList,idSequences,matchingNodes]
    answer = ""
    while not ((answer == "exit") or (answer == "exit()") or (answer == "quit")):
        try:
            print "What do you want to do?"
            print "[Write down the number matching with the action required. Details are in README file]"
            print "   1: User node selection"
            print "   2: Random sub-sampling"
            print "[To quit, write down exit]"
            answer = raw_input("Your answer?\n")
            if (answer =="1"):
                userNodeSelectionAct(dataArray)
                print "-- End \n"
            elif (answer == "2"):
                randomSubSamplingAct(dataArray)
                print "-- End \n"
            elif not ((answer == "exit") or (answer == "exit()") or (answer == "quit")):
                print "\n/!\ ERROR: Please enter a number between 1 and 2 included, or 'exit' if you want to quit."
                raise ValueError
        except ValueError:
            print "/!\ ERROR: Please look at the line above."
            print "/!\ ERROR: If the line above is blank, it may be an uncatched ValueError.\n"
예제 #3
0
def expectMatrix(matrix,n,m):
    exp = 0
    if not n or not m:
        print "\n/!\ ERROR: Math problem (Division by zero)."
        raise ValueError
    for i in range(n):
        for j in range(m):
            try:
            #All values are theoretically non-negative
            #Since we have to deal only with integers (numpy module in Python...)
            #-1 signifies infinite value (see computeDiscriminatoryDistance.py)
            #We ignore these infinite values in the calculus of the expectation
                if not (matrix[i][j] == inf):
                    exp += matrix[i][j]/(n*m)
            except IndexError:
                print "BUG"
                print matrix[i],matrix[j]
                from misc import getSampleIDList
                from parsingInfo import parseInfo
                sampleInfoList,_ = parseInfo("Info")
                sampleIDList = getSampleIDList(sampleInfoList)
                print sampleIDList[i],sampleIDList[j]
                print "END BUG"
    return exp
예제 #4
0
def main():
    iMatrix = raw_input(
        "Write down the CSV file name of the data matrix in the folder \"meta\" [ without the extension .csv ]\n"
    )
    if (iMatrix == ""):
        iMatrix = "Info"
    fastaFileName = raw_input(
        "Write down the MATCH file names in the folder \"meta/match\" [ without the extension .fasta ]\n"
    )
    if (fastaFileName == ""):
        fastaFileName = "GREENGENES_gg16S_unaligned_10022015"
    print "/!\ Data getting parsed..."
    try:
        samplesInfoList, infoList = parseInfo(iMatrix)
        filenames = [sample[0] for sample in samplesInfoList]
    except IOError:
        print "\nERROR: Maybe the filename", iMatrix, ".csv does not exist in \"meta\" folder.\n"
        s.exit(0)
    print "-- End of parsing\n"
    sb.call("ls ./meta/match > sampleidlist", shell=True)
    sampleidlist = sb.check_output(
        "sed 's/.match//g' sampleidlist | sed 's/testfiles//g' | sed '/^$/d'",
        shell=True).split()
    sb.call("rm -f sampleidlist", shell=True)
    result = sb.check_output("ls ./meta/match/testfiles", shell=True)
    if not result:
        print "/!\ Pre-processing files for parsing..."
        process(sampleidlist)
        print "/!\ Pre-processing done."
    print "/!\ Constructing the features vectors..."
    sampleList = mergeList(sampleidlist, filenames)
    try:
        matchingNodes, idSequences, _, _ = featuresCreate(
            sampleList, fastaFileName)
    except ValueError:
        print "/!\ ERROR: Please look at the line above."
        print "/!\ ERROR: If the line above is blank, it may be an uncatched ValueError.\n"
        s.exit(0)
    print "-- End of construction\n"
    dataArray = [
        samplesInfoList, infoList, sampleList, idSequences, matchingNodes
    ]
    answer = ""
    while not ((answer == "exit") or (answer == "exit()") or
               (answer == "quit")):
        try:
            print "What do you want to do?"
            print "[Write down the number matching with the action required. Details are in README file]"
            print "   1: User node selection"
            print "   2: Random sub-sampling"
            print "[To quit, write down exit]"
            answer = raw_input("Your answer?\n")
            if (answer == "1"):
                userNodeSelectionAct(dataArray)
                print "-- End \n"
            elif (answer == "2"):
                randomSubSamplingAct(dataArray)
                print "-- End \n"
            elif not ((answer == "exit") or (answer == "exit()") or
                      (answer == "quit")):
                print "\n/!\ ERROR: Please enter a number between 1 and 2 included, or 'exit' if you want to quit."
                raise ValueError
        except ValueError:
            print "/!\ ERROR: Please look at the line above."
            print "/!\ ERROR: If the line above is blank, it may be an uncatched ValueError.\n"
예제 #5
0
파일: main.py 프로젝트: cbib/taxotree
def main():
    tTree = raw_input("Write down the file name of the taxonomic tree in the folder \"meta\" [ without the extension .tree ]\n")
    if (tTree == ""):
        tTree = "GGdb2015"
    oMatrix = raw_input("Write down the CSV file name of the occurrence matrix in the folder \"meta\" [ without the extension .csv ]\n")
    if (oMatrix == ""):
        oMatrix = "MGAcount_complete"
    iMatrix = raw_input("Write down the CSV file name of the data matrix in the folder \"meta\" [ without the extension .csv ]\n")
    if (iMatrix == ""):
        iMatrix = "Info"
    print "/!\ Data getting parsed..."
    try:
        samplesInfoList,infoList = parseInfo(iMatrix)
        sampleIDList = getSampleIDList(samplesInfoList)
    except IOError:
        print "\nERROR: Maybe the filename",iMatrix,".csv does not exist in \"meta\" folder\n"
        s.exit(0)
    print "..."
    try:
        samplesOccList,speciesList = parseMatrix(oMatrix)
    except IOError:
        print "\nERROR: Maybe the filename",oMatrix,".csv does not exist in \"meta\" folder\n"
        s.exit(0)
    print "..."
    try:
        paths,n,nodesList = parseTree(tTree)
    except IOError:
        print "\nERROR: Maybe the filename",tTree,".tree does not exist in \"meta\" folder\n"
        s.exit(0)
    print "-- End of parsing\n"
    print "/!\ Constructing the whole annotated taxonomic tree"
    print "[ You may have to wait for a few seconds... ]"
    taxoTree = TaxoTree("Root").addNode(paths,nodesList,samplesOccList)
    print "-- End of construction\n"
    dataArray = [samplesInfoList,infoList,samplesOccList,speciesList,paths,n,nodesList,taxoTree,sampleIDList]
    answer = ""
    while not ((answer == "exit") or (answer == "exit()") or (answer == "quit")):
        try:
            print "What do you want to do?"
            print "[Write down the number matching with the action required. Details are in README file]"
            print "   1: Total ratio and Diff ratio"
            print "   2: Pattern ratio"
            print "   3: Percentage of assignments in a certain group of bacterias depending on metadata"
            print "   4: Pearson correlation coefficient"
            print "   5: Microbial diversity in samples"
            print "   6: Similarity coefficients between patients"
            print "   7: Print the taxonomic tree"
            print "   8: Plot a graph, or a pie chart"
            print "   9: Compute total distance between two samples"
            print "[To quit, write down exit]"
            answer = raw_input("Your answer?\n")
            if (answer =="1"):
                totalDiffRatioAct(dataArray)
                print "-- End \n"
            elif (answer == "2"):
                patternRatioAct(dataArray)
                print "-- End \n"
            elif (answer == "3"):
                percentageAct(dataArray)
                print "-- End \n"
            elif (answer == "4"):
                pearsonAct(dataArray)
                print "-- End \n"
            elif (answer == "5"):
                diversityAct(dataArray)
                print "-- End\n"
            elif (answer == "6"):
                matrixSim = similarityAct(dataArray,iMatrix)
                dataArray.append(matrixSim)
                print "-- End \n"
            elif (answer == "7"):
                printTreeAct(dataArray)
                print "-- End \n"
            elif (answer == "8"):
                plottingAct(dataArray)
                print "-- End \n"
            elif (answer == "9"):
                distanceAct(dataArray)
                print "-- End \n"
            elif not ((answer == "exit") or (answer == "exit()") or (answer == "quit")):
                print "\n/!\ ERROR: Please enter a number between 1 and 9 included, or 'exit' if you want to quit."
                raise ValueError
        except ValueError:
            print "/!\ ERROR: Please look at the line above."
            print "/!\ ERROR: If the line above is blank, it may be an uncatched ValueError.\n"
예제 #6
0
def main():
    iMatrix = raw_input("Write down the CSV file name of the data matrix in the folder \"meta\" [ without the extension .csv ]\n")
    if (iMatrix == ""):
        iMatrix = "Info"
    fastaFileName = raw_input("Write down the FASTA file name in the folder \"meta\" [ without the extension .fasta ]\n")
    if (fastaFileName == ""):
        fastaFileName = "GREENGENES_gg16S_unaligned_10022015"
    print "/!\ Data getting parsed..."
    try:
        samplesInfoList,infoList = parseInfo(iMatrix)
        filenames = [sample[0] for sample in samplesInfoList]
    except IOError:
        print "\nERROR: Maybe the filename",iMatrix,".csv does not exist in \"meta\" folder.\n"
        s.exit(0)
    print "-- End of parsing\n"
    sb.call("ls ./meta/match > sampleidlist",shell=True)
    sampleidlist = sb.check_output("sed 's/.match//g' sampleidlist | sed 's/testfiles//g' | sed '/^$/d'",shell=True).split()
    sb.call("rm -f sampleidlist",shell=True)
    result = sb.check_output("ls ./meta/match/testfiles",shell=True)
    if not result:
        print "/!\ Pre-processing files for parsing..."
        process(sampleidlist)
        print "/!\ Pre-processing done."
    print "/!\ Constructing the features vectors..."
    sampleList = mergeList(sampleidlist,filenames)
    try:
        matchingNodes,idSequences,paths,nodesListTree = featuresCreate(sampleList,fastaFileName)
    except ValueError: 
        print "/!\ ERROR: Please look at the line above."
        print "/!\ ERROR: If the line above is blank, it may be an uncatched ValueError.\n"
        s.exit(0)
    print "-- End of construction"
    print "/!\ Constructing the whole taxonomic tree..."
    print "[ You may have to wait a few seconds... ]"
    taxoTree = TaxoTree("Root").addNode(paths,nodesListTree)
    print "-- End of construction\n"
    dataArray = [samplesInfoList,infoList,idSequences,sampleList,matchingNodes,paths,nodesListTree,taxoTree]
    filesList = sb.check_output("ls ./meta | awk '/.dist/'",shell=True).split()
    if not filesList:
        print "/!\ Computing distance matrix... (1/2)"
        start = time()
        computeDistanceMatrix(distMatched,dataArray)
        end = time()
        print "TIME:",end-start,"sec"
        print "/!\ Computing distance matrix... (2/2)"
        start = time()
        computeDistanceMatrix(distConsensus,dataArray)
        end = time()
        print "TIME:",end-start,"sec"
        print "-- End of computation."
    answer = None
    done = False
    while not done and not (answer == "exit" or answer == "exit()" or answer == "quit" or answer == "quit()"):
        try:
            answer = raw_input("Do you want to compute distance matrix for another value of q or import pre-computed matrices? compute/import\n")
            if answer == "compute":
                print "/!\ Computing distance matrix..."
                start = time()
                computeDistanceMatrix(distConsensus,dataArray)
                end = time()
                print "TIME:",end-start,"sec"
                print "-- End of computation."
                done = True
            elif not (answer == "import"):
                print "\n/!\ You should answer by 'compute' or 'import'!"
                raise ValueError
            done = True
            undone = True
            while undone:
                qList = sorted(sb.check_output("ls ./meta | awk '/.dist/' | sed 's/matrix[1-2]//g' | sed 's/.dist//g'",shell=True).split())
                print "List of pre-computed q:",qList
                q = raw_input("Choose q among the ones above.\n")
                if float(q) < 0 or float(q) > 1 or not (q in qList):
                    print "\n/!\ ERROR: Wrong value of q [should be between 0 and 1, and be already computed]:",q,".\n"
                    continue
                else:
                    distMatchedDict = importMatrixToDict("matrix1",dataArray)
                    distConsensusDict = importMatrixToDict("matrix2" + q,dataArray)
                    undone = False
            dataArray.append(distMatchedDict)
            dataArray.append(distConsensusDict)
            print "/!\ Matrices imported."
        except ValueError:
            continue
    answer = ""
    while not ((answer == "exit") or (answer == "exit()") or (answer == "quit") or (answer == "quit()")):
        try:
            print "What do you want to do?"
            print "[Write down the number matching with the action required. Details are in README file]"
            print "   1: Clustering"
            print "   2: Print the taxonomic tree"
            print "[To quit, write down exit]"
            answer = raw_input("Your answer?\n")
            if (answer =="1"):
                clusteringAct(dataArray)
                print "-- End \n"
            elif (answer == "2"):
                printTreeAct(dataArray)
                print "-- End \n"
            elif not ((answer == "exit") or (answer == "exit()") or (answer == "quit")):
                print "\n/!\ ERROR: Please enter a number between 1 and 2 included, or 'exit' if you want to quit."
                raise ValueError
        except ValueError:
            print "/!\ ERROR: Please look at the line above."
            print "/!\ ERROR: If the line above is blank, it may be an uncatched ValueError.\n"