Esempio n. 1
0
def evaluate1(dailyStates, size = 1000, basepath = 'alllogs/'):
	print "length of dailyStates,", len(dailyStates)
	print "size: ", size
	assert len(dailyStates) == size
	limit = size

	distributionGenerated = []
	distributionTest = []
	done = False

	for dayStates in dailyStates:
		distributionGenerated.append(countTransitions(dayStates))


	for files in os.listdir(basepath):
		if done == True:
			break	
		path = os.path.join(basepath, files)
		
		if os.path.isdir(path):
			for logFile in os.listdir(path):
				with open(os.path.join(path, logFile), 'r') as f:
					limit -= 1
					states = random_generator.parseEntry(path, logFile)
					
					periodStates = statesToPeriod(states)
					distributionTest.append(countTransitions(periodStates))

				if limit == 0:
					done = True
					break

	# Plot
	# plt.subplot
	print "distributionGenerated: "
	print distributionGenerated
	print "\n distributionTest: "
	print distributionTest
	# plt.hist(distributionTest)
	# plt.hist(distributionGenerated)
	# plt.savefig("results.png")

	return
Esempio n. 2
0
def evaluate1(trainingSetNumTransitions, dailyStates, size = 10000, basepath = '../../../alllogs/'):
	print "length of dailyStates,", len(dailyStates)
	print "size: ", size
	assert len(dailyStates) == size
	assert len(trainingSetNumTransitions) == size
	limit = size

	listOfFileNames = []
	
	distributionGenerated = []
	distributionTest = []
	done = False

	for dayStates in dailyStates:
		distributionGenerated.append(countTransitions(dayStates))


	for files in os.listdir(basepath):
		if done == True:
			break	
		path = os.path.join(basepath, files)
		
		if os.path.isdir(path):
			for logFile in os.listdir(path):
				listOfFileNames.append([path, logFile])
				
	filesForTesting = []
	
	indices = random.sample(range(1, len(listOfFileNames)), limit)
	for index in indices:
		filesForTesting.append(listOfFileNames[index])
	
	for file1 in filesForTesting:
		path, logFile = file1
		with open(os.path.join(path, logFile), 'r') as f:				
			states = random_generator.parseEntry(path, logFile)
			
			if states != None:
				limit -= 1
				periodStates = statesToPeriod(states)
				distributionTest.append(countTransitions(periodStates))

		if limit == 0:
			done = True
			break

	# Plot
	# plt.subplot
	print "distributionGenerated: "
	print distributionGenerated
	print "\n distributionTest: "
	print distributionTest
	# plt.hist(distributionTest)
	# plt.hist(distributionGenerated)
	# plt.savefig("results.png")

	print len(distributionGenerated)
	print len(distributionTest)

	bins = np.linspace(min(min(distributionGenerated), min(distributionTest)), max(max(distributionGenerated), max(distributionTest)), 50)
	
	pickle.dump(distributionTest, open("dtest.pickle", "wb" ))
	# distributionTest.toPickle("dtest.pickle")
	pickle.dump(distributionGenerated, open("dgenerated.pickle", "wb" ))
	pickle.dump(trainingSetNumTransitions, open("trainingset.pickle", "wb" ))
	
	# distributionTest.toPickle("dtest.pickle")
	# distributionGenerated.toPickle("dgenerated.pickle")
	# trainingSetNumTransitions.toPickle("trainingset.pickle")
# color = 'green'
	plt.hist(distributionTest, bins, alpha =0.7, label = 'Test Set')
	plt.hist(distributionGenerated, bins, alpha = 0.7, label = 'Markov Generated')
	plt.hist(trainingSetNumTransitions, bins, alpha= 0.7, label = "Training Set")

	plt.legend(loc='upper right')
	plt.title("Distribution of Daily Number of Transitions")

	plt.xlabel("Number Transitions")
	plt.ylabel("Days")

	plt.savefig("Markov On All.png")

	plt.show()
	return
Esempio n. 3
0
			if os.path.isdir(path):
				for logFile in os.listdir(path):
					# logFile contains ID
					print logFile
					try:
						cluster = int(idToCluster.loc[logFile]['cluster'])
					except KeyError:
						continue
					
					print "Cluster for " + logFile + " is " + str(cluster) + "\n"

					if limit == 0:
						finished = True
						break

					states = random_generator.parseEntry(path, logFile)
					if states != None:

					# print states

						periods = statesToPeriod(states)
						transitionMatrix = computeTransitionMatrix(periods)
						numTrans = countTransitions(periods)
						trainingSetNumTransitions.append(numTrans)

						totalTransitionMatrix = totalTransitionMatrix + transitionMatrix
						if cluster == 1:
							cluster2Matrix += transitionMatrix
						if cluster == 0:
							cluster1Matrix += transitionMatrix
						if cluster == 2:
Esempio n. 4
0
import random_generator
import pymc3 as pm
import os
import random

path = "fakeData"
for filename in filter(lambda f: not f.startswith('.'), os.listdir(path)):
    states = random_generator.parseEntry(path, filename)
    print states

# define the system and the data
true_pos, true_vel = 0, .7
true_positions = [true_vel * step for step in range(100)]

# we're using `some_tau` for the noise throughout the example.
# this should be replaced with something more meaningful.
some_tau = 1 / .5**2

# PRIORS
# we don't know too much about the velocity, might be pos. or neg.
vel = pm.Normal("vel", mu=0, tau=some_tau)

# MODEL
# next_state = prev_state + vel (and some gaussian noise)
# That means that each state depends on the prev_state and the vel.
# We save the states in a list.
states = [pm.Normal("s0", mu=true_positions[0], tau=some_tau)]
for i in range(1, len(true_positions)):
    states.append(
        pm.Normal(name="s" + str(i), mu=states[-1] + vel, tau=some_tau))
Esempio n. 5
0
	except Exception, e:
		print e
		parse = False
	
	if parse:
		for files in os.listdir(basepath):
			if finished == True:
				break
			path = os.path.join(basepath, files)
			if os.path.isdir(path):
				for logFile in os.listdir(path):
					if limit == 0:
						finished = True
						break

					states = random_generator.parseEntry(path, logFile)
					# print states

					periods = statesToPeriod(states)
					transitionMatrix = computeTransitionMatrix(periods)

					totalTransitionMatrix = totalTransitionMatrix + transitionMatrix
					limit -= 1
					# print limit
					# print totalTransitionMatrix

	# print "Training finished."
	normed_matrix = computeProbabilityMatrix(totalTransitionMatrix)
	#print normed_matrix

Esempio n. 6
0
import random_generator
import pymc3 as pm
import os
import random

path = "fakeData"
for filename in filter( lambda f: not f.startswith('.'), os.listdir(path)):
	states = random_generator.parseEntry(path, filename)
	print states



# define the system and the data
true_pos, true_vel = 0, .7
true_positions = [true_vel * step for step in range(100)]

# we're using `some_tau` for the noise throughout the example.
# this should be replaced with something more meaningful.
some_tau = 1 / .5**2

# PRIORS
# we don't know too much about the velocity, might be pos. or neg. 
vel = pm.Normal("vel", mu=0, tau=some_tau)

# MODEL
# next_state = prev_state + vel (and some gaussian noise)
# That means that each state depends on the prev_state and the vel.
# We save the states in a list.
states = [pm.Normal("s0", mu=true_positions[0], tau=some_tau)]
for i in range(1, len(true_positions)):
    states.append(pm.Normal(name="s" + str(i),