Ejemplo n.º 1
0
def runMethodC(blastOutputFilePath, fastaFilePath, tmpDir, scoring, onto,
               configMap, debug):  #
    p("Running Method C")

    currCwd = os.getcwd()

    tmpDirPath = os.path.join(tmpDir, "methodC")
    outputFilePath = os.path.join(tmpDirPath, "output.%s.txt" % (onto))
    commandsi = [
        "mkdir -p " + tmpDirPath, "cd " + configMap["GROUP_C_PATH"],
        "./CafaWrapper3.pl %s %s %s %s" %
        (blastOutputFilePath, outputFilePath, scoring, tmpDirPath)
    ]
    commandString = ";".join(commandsi)
    if debug:
        print >> sys.stderr, commandString
    os.chdir(currCwd)

    logFile = None
    logPath = os.path.join(tmpDir, "logC.txt")
    if not os.path.exists(logPath):
        logFile = open(logPath, 'w')
    else:
        logFile = open(logPath, 'a')

    s, o = commands.getstatusoutput(commandString)
    if True:  #s != 0:
        logFile.write("Command: " + commandString)
        logFile.write(str(s))
        logFile.write(o)
    if s != 0:
        print >> sys.stderr, "!!!Error!!! " + commandString
        print >> sys.stderr, str(s)
        print >> sys.stderr, o
        #raise
    logFile.close()

    predFilesContent = []
    with open(outputFilePath) as f:
        preds = set([])
        for line in f.readlines():
            line = line.rstrip()
            if line.rstrip() == "" or line.startswith(
                    "AUTHOR") or line.startswith("MODEL") or line.startswith(
                        "ACCURACY") or line.startswith(
                            "KEYWORDS") or line.startswith("END"):
                None
            else:
                targetId, goTerm, rel = line.rstrip().split(" ")
                relFloat = max(min(float(rel), 1.00), 0.00)
                rel = "%.2f" % (relFloat)
                line = targetId[:63] + "\t" + goTerm + "\t" + rel
                if targetId[:63] + "\t" + goTerm not in preds and goTerm.strip(
                ) != "" and float(rel) > 0.0:
                    predFilesContent.append(line)
                    preds.add(targetId[:63] + "\t" + goTerm)

    return predFilesContent
Ejemplo n.º 2
0
def runMethodB(blastOutputFilePath, fastaFilePath, tmpDir, GROUP_B_K, onto, configMap):#
	p("Running Method B")
	
	currCwd = os.getcwd()
	
	tmpDirPath = os.path.join(tmpDir, "methodB")
	if not os.path.exists(tmpDirPath):
		os.mkdir(tmpDirPath)
	jarPath = configMap["JAR_INSTALL_FOLDER_PATH"].rstrip("/")
	outputFilePath = os.path.join(tmpDirPath, onto)
	os.mkdir(outputFilePath)
	commandsi = ["mkdir -p " + tmpDirPath, 
	   			"cd " + configMap["GROUP_B_PATH"],
				"./knn_weighted -m weighted_knn -j %s -d %s -i %s -o %s -k %s -l %s" % (blastOutputFilePath, blastOutputFilePath, fastaFilePath, outputFilePath, GROUP_B_K, jarPath)]
	commandString = ";".join(commandsi)
	#print commandString
	os.chdir(currCwd)

	logFile=None
	logPath=os.path.join(tmpDir, "logB.txt")
	if not os.path.exists(logPath):
		logFile = open(logPath, 'w')
	else:
		logFile = open(logPath, 'a')
	
	s, o = commands.getstatusoutput(commandString)
	if True:#s != 0:
		logFile.write("Command: " + commandString + "\n")
		logFile.write(str(s) + "\n")
		logFile.write(o+ "\n")
	if s != 0:
		print >> sys.stderr,"!!!Error!!! " + commandString
		print >> sys.stderr,str(s)
		print >> sys.stderr,o
		#raise
	logFile.close()
	
	outputFilePath = os.path.join(outputFilePath, os.path.basename(fastaFilePath)+".weighted_knn.predicted_leaves")

	idToGoTermCount = {}
	predFilesContent=[]
	with open(outputFilePath) as f: 
		for line in f.readlines():
			line = line.rstrip()
			if line.startswith("AUTHOR") or line.startswith("MODEL") or line.startswith("ACCURACY") or line.startswith("KEYWORDS") or line.startswith("END"):
				None
			else:
				currId = line.split("\t")[0][:63]
				restOfLine = "\t".join(line.split("\t")[1:])
				if idToGoTermCount.get(currId, 0) < 1000: 
					if float(restOfLine.split("\t")[1]) > 1.0:
						restOfLine = restOfLine.rstrip("\n").replace("1.01", "1.00")
					idToGoTermCount[currId] = idToGoTermCount.get(currId, 0) + 1
					line = currId + "\t" +  restOfLine
					predFilesContent.append(line)
	return predFilesContent
Ejemplo n.º 3
0
def runMethodA(blastOutputFilePath, fastaFilePath, tmpDir, GROUP_A_THRESHOLD, GROUP_A_K, iters, onto, configMap):#
	p("Running Method A")
	
	currCwd = os.getcwd()
	
	tmpDirPath = os.path.join(tmpDir, "methodA")
	jarPath = os.path.join(configMap["JAR_INSTALL_FOLDER_PATH"], "gossip.jar")
	outputFilePath = os.path.join(tmpDirPath, "output.%s.cafa.txt" % (onto))
	commandsi = ["mkdir -p " + tmpDirPath, 
	   			"cd " + configMap["GROUP_A_PATH"],
				"java -cp %s GOSSIPSTarter %s %s %s %s %s %s" % (jarPath, fastaFilePath, blastOutputFilePath, outputFilePath, GROUP_A_THRESHOLD, GROUP_A_K, iters)]
	commandString = ";".join(commandsi)

	os.chdir(currCwd)

	logFile=None
	logPath=os.path.join(tmpDir, "logA.txt")
	if not os.path.exists(logPath):
		logFile = open(logPath, 'w')
	else:
		logFile = open(logPath, 'a')

	s, o = commands.getstatusoutput(commandString)
	if True:#s != 0:
		logFile.write("Command: " + commandString + "\n")
		logFile.write(str(s)+"\n")
		logFile.write(o+"\n")
	if s != 0:
		print >> sys.stderr,"!!!Error!!! " + commandString
		print >> sys.stderr,str(s)
		print >> sys.stderr,o
		#raise
	logFile.close()
	
	outputFilePath = outputFilePath+".cafa"
	outputFile = open(outputFilePath)
	preds =  set([])
	predFilesContent=[]
	for line in outputFile:
		if line.startswith("AUTHOR") or line.startswith("MODEL") or line.startswith("ACCURACY") or line.startswith("KEYWORDS") or line.startswith("END"):
			None
		else:
			targetId, goTerm, rel = line.rstrip().split("\t")
			targetId = targetId[1:].split("(")[0][:63]
		
			relFloat = max(min(float(rel), 1.00), 0.00)
			rel = "%.2f" % (relFloat)

			line = targetId + "\t" + goTerm + "\t" + rel

			if targetId + "\t" + goTerm not in preds and goTerm.strip() != "":
				predFilesContent.append(line)
				preds.add(targetId + "\t" + goTerm)

	return predFilesContent
Ejemplo n.º 4
0
def runMethodC(blastOutputFilePath, fastaFilePath, tmpDir, scoring, onto, configMap, debug):#
	p("Running Method C")
	
	currCwd = os.getcwd()
	
	tmpDirPath = os.path.join(tmpDir, "methodC")
	outputFilePath = os.path.join(tmpDirPath, "output.%s.txt" % (onto))
	commandsi = ["mkdir -p " + tmpDirPath, 
	   			"cd " + configMap["GROUP_C_PATH"],
				"./CafaWrapper3.pl %s %s %s %s" % (blastOutputFilePath, outputFilePath, scoring, tmpDirPath)]
	commandString = ";".join(commandsi)
	if debug:
		print >> sys.stderr, commandString
	os.chdir(currCwd)

	logFile=None
	logPath=os.path.join(tmpDir, "logC.txt")
	if not os.path.exists(logPath):
		logFile = open(logPath, 'w')
	else:
		logFile = open(logPath, 'a')
	
	s, o = commands.getstatusoutput(commandString)
	if True:#s != 0:
		logFile.write("Command: " + commandString)
		logFile.write(str(s))
		logFile.write(o)
	if s != 0:
		print >> sys.stderr, "!!!Error!!! " + commandString
		print >> sys.stderr,str(s)
		print >> sys.stderr,o
		#raise
	logFile.close()

	predFilesContent = []
	with open(outputFilePath) as f: 						
		preds =  set([])
		for line in f.readlines():
			line=line.rstrip()
			if line.rstrip() == "" or line.startswith("AUTHOR") or line.startswith("MODEL") or line.startswith("ACCURACY") or line.startswith("KEYWORDS") or line.startswith("END"):
				None
			else:
				targetId, goTerm, rel = line.rstrip().split(" ")
				relFloat = max(min(float(rel), 1.00), 0.00)
				rel = "%.2f" % (relFloat)					
				line = targetId[:63] + "\t" + goTerm + "\t" + rel				
				if targetId[:63] + "\t" + goTerm not in preds and goTerm.strip() != "" and float(rel) > 0.0:
					predFilesContent.append(line)
					preds.add(targetId[:63] + "\t" + goTerm)

	return predFilesContent
Ejemplo n.º 5
0
def runBlast(inputFilePath, blastDatabasePath, outputFilePath, tmpDir, eValue,
             iters, configMap):

    fastaSplits = splitBigFastaFile(inputFilePath, tmpDir,
                                    configMap["FASTA_SPLIT_SIZE"])

    for i, fastaSplit in enumerate(fastaSplits):
        blastParas = blastallParameters()
        blastParas.setBlastExePath(configMap["BLASTPGP_EXE_PATH"])
        blastParas.setBlastDatabasePath(blastDatabasePath)
        blastParas.setEValue(eValue)
        blastParas.setJ(iters)
        blastParas.setB(1000)
        blastParas.setV(1000)
        blastParas.setInputFilePath(fastaSplit)
        blastParas.setOutputFilePath(fastaSplit + ".blast")

        blastCommand = createBlastPGPCommand(blastParas)
        #print blastCommand

        # 		executeCommandInSGELocalAsync(blastCommand, "blast_%d" % i, tmpDir)
        #
        # 	executeCommandInSGELocalAsyncJoin()

        s, o = commands.getstatusoutput(blastCommand)
        if s != 0:
            Logger.log("!!!Error!!! " + blastCommand)
            Logger.log(s)
            Logger.log(o)
            raise Exception

    p("Merging Blast Output")
    #	allRoundSplits = ["" for bla in range(int(NUMBER_OF_ITERATIONS))]
    bigOutputFile = open(outputFilePath, 'w')
    for fastaSplit in fastaSplits:
        blastFile = open(fastaSplit + ".blast")
        blastOutput = "\n" + blastFile.read()
        blastFile.close()
        bigOutputFile.write(blastOutput)
    bigOutputFile.close()
Ejemplo n.º 6
0
def runBlast(inputFilePath, blastDatabasePath, outputFilePath, tmpDir, eValue, iters, configMap):
	
	fastaSplits = splitBigFastaFile(inputFilePath, tmpDir, configMap["FASTA_SPLIT_SIZE"])

	for i, fastaSplit in enumerate(fastaSplits):
		blastParas = blastallParameters()
		blastParas.setBlastExePath(configMap["BLASTPGP_EXE_PATH"])
		blastParas.setBlastDatabasePath(blastDatabasePath)
		blastParas.setEValue(eValue)
		blastParas.setJ(iters)
		blastParas.setB(1000)
		blastParas.setV(1000)
		blastParas.setInputFilePath(fastaSplit)
		blastParas.setOutputFilePath(fastaSplit+".blast")
		
		blastCommand = createBlastPGPCommand(blastParas)
		#print blastCommand
		
# 		executeCommandInSGELocalAsync(blastCommand, "blast_%d" % i, tmpDir)
# 		
# 	executeCommandInSGELocalAsyncJoin()
		
 		s, o = commands.getstatusoutput(blastCommand)
 		if s != 0:
 			Logger.log("!!!Error!!! " + blastCommand)
 			Logger.log(s)
 			Logger.log(o)
 			raise Exception
 		
	p("Merging Blast Output")
#	allRoundSplits = ["" for bla in range(int(NUMBER_OF_ITERATIONS))]
	bigOutputFile = open(outputFilePath,'w')
	for fastaSplit in fastaSplits:
		blastFile = open(fastaSplit+".blast")
		blastOutput = "\n" + blastFile.read()
		blastFile.close()
		bigOutputFile.write(blastOutput)
	bigOutputFile.close()
Ejemplo n.º 7
0
def runMethodA(blastOutputFilePath, fastaFilePath, tmpDir, GROUP_A_THRESHOLD,
               GROUP_A_K, iters, onto, configMap):  #
    p("Running Method A")

    currCwd = os.getcwd()

    tmpDirPath = os.path.join(tmpDir, "methodA")
    jarPath = os.path.join(configMap["JAR_INSTALL_FOLDER_PATH"], "gossip.jar")
    outputFilePath = os.path.join(tmpDirPath, "output.%s.cafa.txt" % (onto))
    commandsi = [
        "mkdir -p " + tmpDirPath, "cd " + configMap["GROUP_A_PATH"],
        "java -cp %s GOSSIPSTarter %s %s %s %s %s %s" %
        (jarPath, fastaFilePath, blastOutputFilePath, outputFilePath,
         GROUP_A_THRESHOLD, GROUP_A_K, iters)
    ]
    commandString = ";".join(commandsi)

    os.chdir(currCwd)

    logFile = None
    logPath = os.path.join(tmpDir, "logA.txt")
    if not os.path.exists(logPath):
        logFile = open(logPath, 'w')
    else:
        logFile = open(logPath, 'a')

    s, o = commands.getstatusoutput(commandString)
    if True:  #s != 0:
        logFile.write("Command: " + commandString + "\n")
        logFile.write(str(s) + "\n")
        logFile.write(o + "\n")
    if s != 0:
        print >> sys.stderr, "!!!Error!!! " + commandString
        print >> sys.stderr, str(s)
        print >> sys.stderr, o
        #raise
    logFile.close()

    outputFilePath = outputFilePath + ".cafa"
    outputFile = open(outputFilePath)
    preds = set([])
    predFilesContent = []
    for line in outputFile:
        if line.startswith("AUTHOR") or line.startswith(
                "MODEL") or line.startswith("ACCURACY") or line.startswith(
                    "KEYWORDS") or line.startswith("END"):
            None
        else:
            targetId, goTerm, rel = line.rstrip().split("\t")
            targetId = targetId[1:].split("(")[0][:63]

            relFloat = max(min(float(rel), 1.00), 0.00)
            rel = "%.2f" % (relFloat)

            line = targetId + "\t" + goTerm + "\t" + rel

            if targetId + "\t" + goTerm not in preds and goTerm.strip() != "":
                predFilesContent.append(line)
                preds.add(targetId + "\t" + goTerm)

    return predFilesContent
Ejemplo n.º 8
0
def runMethodB(blastOutputFilePath, fastaFilePath, tmpDir, GROUP_B_K, onto,
               configMap):  #
    p("Running Method B")

    currCwd = os.getcwd()

    tmpDirPath = os.path.join(tmpDir, "methodB")
    if not os.path.exists(tmpDirPath):
        os.mkdir(tmpDirPath)
    jarPath = configMap["JAR_INSTALL_FOLDER_PATH"].rstrip("/")
    outputFilePath = os.path.join(tmpDirPath, onto)
    os.mkdir(outputFilePath)
    commandsi = [
        "mkdir -p " + tmpDirPath, "cd " + configMap["GROUP_B_PATH"],
        "./knn_weighted -m weighted_knn -j %s -d %s -i %s -o %s -k %s -l %s" %
        (blastOutputFilePath, blastOutputFilePath, fastaFilePath,
         outputFilePath, GROUP_B_K, jarPath)
    ]
    commandString = ";".join(commandsi)
    #print commandString
    os.chdir(currCwd)

    logFile = None
    logPath = os.path.join(tmpDir, "logB.txt")
    if not os.path.exists(logPath):
        logFile = open(logPath, 'w')
    else:
        logFile = open(logPath, 'a')

    s, o = commands.getstatusoutput(commandString)
    if True:  #s != 0:
        logFile.write("Command: " + commandString + "\n")
        logFile.write(str(s) + "\n")
        logFile.write(o + "\n")
    if s != 0:
        print >> sys.stderr, "!!!Error!!! " + commandString
        print >> sys.stderr, str(s)
        print >> sys.stderr, o
        #raise
    logFile.close()

    outputFilePath = os.path.join(
        outputFilePath,
        os.path.basename(fastaFilePath) + ".weighted_knn.predicted_leaves")

    idToGoTermCount = {}
    predFilesContent = []
    with open(outputFilePath) as f:
        for line in f.readlines():
            line = line.rstrip()
            if line.startswith("AUTHOR") or line.startswith(
                    "MODEL") or line.startswith("ACCURACY") or line.startswith(
                        "KEYWORDS") or line.startswith("END"):
                None
            else:
                currId = line.split("\t")[0][:63]
                restOfLine = "\t".join(line.split("\t")[1:])
                if idToGoTermCount.get(currId, 0) < 1000:
                    if float(restOfLine.split("\t")[1]) > 1.0:
                        restOfLine = restOfLine.rstrip("\n").replace(
                            "1.01", "1.00")
                    idToGoTermCount[currId] = idToGoTermCount.get(currId,
                                                                  0) + 1
                    line = currId + "\t" + restOfLine
                    predFilesContent.append(line)
    return predFilesContent