def relEntropy(motif1, motif2): output = 0.0 try: for i in range(0, len(motif1)): for j in range(0,4): try: output = output + motif1[i][j] * math.log(motif1[i][j] / motif2[i][j]) except ZeroDivisionError: output += 0 except IndexError: output = -100.54 return output if __name__ == "__main__": print "hello evaluation" predMotifFiles = directory.getFiles('predictedmotif.txt') #array of file names motifFiles = directory.getFiles('motif.txt') entropy_result = computeEntropy(motifFiles, predMotifFiles) fo = open('entropy_output.txt','w') for x in entropy_result: fo.write("" +x[0]+','+str(x[1]) +"\n") fo.close() sitesFiles = directory.getFiles('sites.txt') predSitesFiles = directory.getFiles('predictedsites.txt') site_result = siteOverlap(sitesFiles,predSitesFiles) fo = open('site_output.txt','w') for x in site_result: fo.write("" +x[0]+','+str(x[1]) +"\n")
def chooseMotifPositions(): positions = [] for x in range(0, globals.numberOfSequences): positions.append(random.randint(0, globals.lengthOfSequences - globals.motifLength)) return positions if __name__ == "__main__": print "\nRunning motif finder..." print "-----------------------" + '\n' startTime = datetime.datetime.now() print "start time = " + str(startTime) + '\n' # create array of motif length text files motifLengthFiles = directory.getFiles('motiflength.txt') sequencesFiles = directory.getFiles('sequences.fa') numberOfFiles = len(motifLengthFiles) # iterate over arrays informationContent = 0 for x in range(0, numberOfFiles): sequencesFile = sequencesFiles[x] motifLengthFile = motifLengthFiles[x] sequences = reader.readFastaFile(sequencesFile) motifLength = reader.readMotifLengthFile(motifLengthFile) output = findMotif(sequences, motifLength, int(sys.argv[1])) informationContent += output[2] writer.writePredictions(output, sequencesFile, motifLength) print "files written for " + str(sequencesFile) if (x + 1) % 10 == 0: