Esempio n. 1
0
def relEntropy(motif1, motif2):
	output = 0.0 
	try:
		for i in range(0, len(motif1)):
			for j in range(0,4):
				try:
					output = output + motif1[i][j] * math.log(motif1[i][j] / motif2[i][j])
				except ZeroDivisionError:
					output += 0
	except IndexError:
		output = -100.54	
	return output

if __name__ == "__main__":
	print "hello evaluation"
	predMotifFiles = directory.getFiles('predictedmotif.txt')	#array of file names
        motifFiles = directory.getFiles('motif.txt')
	entropy_result = computeEntropy(motifFiles, predMotifFiles)

	fo = open('entropy_output.txt','w')
	for x in entropy_result:
		fo.write("" +x[0]+','+str(x[1]) +"\n")
	fo.close()

	sitesFiles = directory.getFiles('sites.txt') 
	predSitesFiles = directory.getFiles('predictedsites.txt')
	site_result = siteOverlap(sitesFiles,predSitesFiles)

	fo = open('site_output.txt','w')
	for x in site_result:
		fo.write("" +x[0]+','+str(x[1]) +"\n")
Esempio n. 2
0
def chooseMotifPositions():
        positions = []
        for x in range(0, globals.numberOfSequences): 
                positions.append(random.randint(0, globals.lengthOfSequences - globals.motifLength))
        return positions
        
if __name__ == "__main__":
        print "\nRunning motif finder..."
        print "-----------------------" + '\n'

        startTime = datetime.datetime.now()
        print "start time = " + str(startTime) + '\n'

        # create array of motif length text files
        motifLengthFiles = directory.getFiles('motiflength.txt')
        sequencesFiles = directory.getFiles('sequences.fa')
        numberOfFiles = len(motifLengthFiles)

        # iterate over arrays 
        informationContent = 0
        for x in range(0, numberOfFiles):
                sequencesFile = sequencesFiles[x]
                motifLengthFile = motifLengthFiles[x]
                sequences = reader.readFastaFile(sequencesFile)
                motifLength = reader.readMotifLengthFile(motifLengthFile)
                output = findMotif(sequences, motifLength, int(sys.argv[1]))
                informationContent += output[2]
                writer.writePredictions(output, sequencesFile, motifLength)
                print "files written for " + str(sequencesFile)
                if (x + 1) % 10 == 0: