def evaluate1(dailyStates, size = 1000, basepath = 'alllogs/'): print "length of dailyStates,", len(dailyStates) print "size: ", size assert len(dailyStates) == size limit = size distributionGenerated = [] distributionTest = [] done = False for dayStates in dailyStates: distributionGenerated.append(countTransitions(dayStates)) for files in os.listdir(basepath): if done == True: break path = os.path.join(basepath, files) if os.path.isdir(path): for logFile in os.listdir(path): with open(os.path.join(path, logFile), 'r') as f: limit -= 1 states = random_generator.parseEntry(path, logFile) periodStates = statesToPeriod(states) distributionTest.append(countTransitions(periodStates)) if limit == 0: done = True break # Plot # plt.subplot print "distributionGenerated: " print distributionGenerated print "\n distributionTest: " print distributionTest # plt.hist(distributionTest) # plt.hist(distributionGenerated) # plt.savefig("results.png") return
def evaluate1(trainingSetNumTransitions, dailyStates, size = 10000, basepath = '../../../alllogs/'): print "length of dailyStates,", len(dailyStates) print "size: ", size assert len(dailyStates) == size assert len(trainingSetNumTransitions) == size limit = size listOfFileNames = [] distributionGenerated = [] distributionTest = [] done = False for dayStates in dailyStates: distributionGenerated.append(countTransitions(dayStates)) for files in os.listdir(basepath): if done == True: break path = os.path.join(basepath, files) if os.path.isdir(path): for logFile in os.listdir(path): listOfFileNames.append([path, logFile]) filesForTesting = [] indices = random.sample(range(1, len(listOfFileNames)), limit) for index in indices: filesForTesting.append(listOfFileNames[index]) for file1 in filesForTesting: path, logFile = file1 with open(os.path.join(path, logFile), 'r') as f: states = random_generator.parseEntry(path, logFile) if states != None: limit -= 1 periodStates = statesToPeriod(states) distributionTest.append(countTransitions(periodStates)) if limit == 0: done = True break # Plot # plt.subplot print "distributionGenerated: " print distributionGenerated print "\n distributionTest: " print distributionTest # plt.hist(distributionTest) # plt.hist(distributionGenerated) # plt.savefig("results.png") print len(distributionGenerated) print len(distributionTest) bins = np.linspace(min(min(distributionGenerated), min(distributionTest)), max(max(distributionGenerated), max(distributionTest)), 50) pickle.dump(distributionTest, open("dtest.pickle", "wb" )) # distributionTest.toPickle("dtest.pickle") pickle.dump(distributionGenerated, open("dgenerated.pickle", "wb" )) pickle.dump(trainingSetNumTransitions, open("trainingset.pickle", "wb" )) # distributionTest.toPickle("dtest.pickle") # distributionGenerated.toPickle("dgenerated.pickle") # trainingSetNumTransitions.toPickle("trainingset.pickle") # color = 'green' plt.hist(distributionTest, bins, alpha =0.7, label = 'Test Set') plt.hist(distributionGenerated, bins, alpha = 0.7, label = 'Markov Generated') plt.hist(trainingSetNumTransitions, bins, alpha= 0.7, label = "Training Set") plt.legend(loc='upper right') plt.title("Distribution of Daily Number of Transitions") plt.xlabel("Number Transitions") plt.ylabel("Days") plt.savefig("Markov On All.png") plt.show() return
if os.path.isdir(path): for logFile in os.listdir(path): # logFile contains ID print logFile try: cluster = int(idToCluster.loc[logFile]['cluster']) except KeyError: continue print "Cluster for " + logFile + " is " + str(cluster) + "\n" if limit == 0: finished = True break states = random_generator.parseEntry(path, logFile) if states != None: # print states periods = statesToPeriod(states) transitionMatrix = computeTransitionMatrix(periods) numTrans = countTransitions(periods) trainingSetNumTransitions.append(numTrans) totalTransitionMatrix = totalTransitionMatrix + transitionMatrix if cluster == 1: cluster2Matrix += transitionMatrix if cluster == 0: cluster1Matrix += transitionMatrix if cluster == 2:
import random_generator import pymc3 as pm import os import random path = "fakeData" for filename in filter(lambda f: not f.startswith('.'), os.listdir(path)): states = random_generator.parseEntry(path, filename) print states # define the system and the data true_pos, true_vel = 0, .7 true_positions = [true_vel * step for step in range(100)] # we're using `some_tau` for the noise throughout the example. # this should be replaced with something more meaningful. some_tau = 1 / .5**2 # PRIORS # we don't know too much about the velocity, might be pos. or neg. vel = pm.Normal("vel", mu=0, tau=some_tau) # MODEL # next_state = prev_state + vel (and some gaussian noise) # That means that each state depends on the prev_state and the vel. # We save the states in a list. states = [pm.Normal("s0", mu=true_positions[0], tau=some_tau)] for i in range(1, len(true_positions)): states.append( pm.Normal(name="s" + str(i), mu=states[-1] + vel, tau=some_tau))
except Exception, e: print e parse = False if parse: for files in os.listdir(basepath): if finished == True: break path = os.path.join(basepath, files) if os.path.isdir(path): for logFile in os.listdir(path): if limit == 0: finished = True break states = random_generator.parseEntry(path, logFile) # print states periods = statesToPeriod(states) transitionMatrix = computeTransitionMatrix(periods) totalTransitionMatrix = totalTransitionMatrix + transitionMatrix limit -= 1 # print limit # print totalTransitionMatrix # print "Training finished." normed_matrix = computeProbabilityMatrix(totalTransitionMatrix) #print normed_matrix
import random_generator import pymc3 as pm import os import random path = "fakeData" for filename in filter( lambda f: not f.startswith('.'), os.listdir(path)): states = random_generator.parseEntry(path, filename) print states # define the system and the data true_pos, true_vel = 0, .7 true_positions = [true_vel * step for step in range(100)] # we're using `some_tau` for the noise throughout the example. # this should be replaced with something more meaningful. some_tau = 1 / .5**2 # PRIORS # we don't know too much about the velocity, might be pos. or neg. vel = pm.Normal("vel", mu=0, tau=some_tau) # MODEL # next_state = prev_state + vel (and some gaussian noise) # That means that each state depends on the prev_state and the vel. # We save the states in a list. states = [pm.Normal("s0", mu=true_positions[0], tau=some_tau)] for i in range(1, len(true_positions)): states.append(pm.Normal(name="s" + str(i),