def createCurrentState(self, previousState, currentState, row, buildType):
		rxState = self.createRxState(previousState, currentState)

		if rxState != currentState:
			return rxState

		return utils.buildTransition(buildType, row[7], row[6], '', '') + currentState
def predictNextState(goldFileList, transitionDictionary, emissionDictionary, filterOption):
	previous = ""

	for tup in goldFileList:
		if previous == "":
			previous = tup[0]
			continue

		current = utils.buildTransition(filterOption, tup[3], tup[2], tup[4], tup[5]) + tup[0]
		
		# get most frequent from previous
		if transitionDictionary.has_key(previous):
			highestProbNext = getHighestProb(transitionDictionary, previous, 1)[0][0]

			emissionKey = previous + "_" + highestProbNext
			if emissionDictionary.has_key(emissionKey):
				frequentAmount = getHighestProb(emissionDictionary, emissionKey, 1)[0][0]
				randomAmount = getRandomAmount(emissionDictionary, emissionKey)
				highestAmount = getHighestAmount(emissionDictionary, emissionKey)
				lowestAmount = getLowestAmount(emissionDictionary, emissionKey)
				yield [previous, current, highestProbNext, float(tup[1]), randomAmount, frequentAmount, highestAmount, lowestAmount, (current == highestProbNext)]
			else:
				yield [previous, current, highestProbNext, float(tup[1]), 0, 0, 0, 0, (current == highestProbNext)]
			
			previous = current
		else:
			yield ["error " + previous, current, "error", "error", "error", "error", "error", "error", "error"]
def goldFileCheck(goldFileList, transitionDictionary, emissionDictionary, filterOption, maxToTake=100):
	totalErrors = 0
	total = 0
	previous = ""
	current = ""

	goldAmount = 0
	path = previous
	trainAmount = 0
	trainHighest = 0
	trainLowest = 0
	randomAmount = 0
	total = total + 1
	foundError = False

	for tup in goldFileList:
		# handle start state
		if previous == "":
			previous = tup[0]
			path = path + " " + previous
			continue

		current = utils.buildTransition(filterOption, tup[3], tup[2], tup[4], tup[5]) + tup[0]
		path = path + " " + current
		amount = float(tup[1])

		if (transitionDictionary.has_key(previous) and 
			transitionDictionary[previous].has_key(current)):

			emissionKey = previous + "_" + current

			if emissionDictionary.has_key(emissionKey):
				goldAmount = goldAmount + amount
				trainAmount = trainAmount + float(getHighestProb(emissionDictionary, emissionKey, 1)[0][0])
				trainHighest = trainHighest + getHighestAmount(emissionDictionary, emissionKey)
				trainLowest = trainLowest + getLowestAmount(emissionDictionary, emissionKey)
				randomAmount = randomAmount + getRandomAmount(emissionDictionary, emissionKey)
				previous = current
				continue

		totalErrors = totalErrors + 1
		foundError = True
		yield ["%s %s" % (previous, current), "Error", "Error", "Error", "Error", "Error"]
		break

	# don't report if there was an error...
	if foundError == True:
		return

	if len(path) > 7500:
		yield [path[0:7500], goldAmount, randomAmount, trainAmount, trainLowest, trainHighest]
	else:
		yield [path, goldAmount, randomAmount, trainAmount, trainLowest, trainHighest]
def predictTrans(goldFileList, transitionDictionary, emissionDictionary, filterOption):
	previous = ""

	for tup in goldFileList:
		if previous == "":
			previous = tup[0]
			continue

		current = utils.buildTransition(filterOption, tup[3], tup[2], tup[4], tup[5]) + tup[0]
		
		# get most frequent from previous
		if transitionDictionary.has_key(previous) and transitionDictionary[previous].has_key(current):
			highestProbNext = getHighestProb(transitionDictionary, previous, 1)[0][0]
			yield [previous, current, highestProbNext, transitionDictionary[previous][current], transitionDictionary[previous][highestProbNext], (current == highestProbNext)]
			previous = current
		else:
			yield [previous, current, "error", "error", "error", "error"]
	def build(self, buildType):
		csv_file_object = csv.reader(open(self.fileName, 'rb'))
		header = csv_file_object.next()

		# read all of a member's claims in at once
		# NewMemberID, CPTCode
		currentMemberId = ""
		currentDependentId = ""

		# CPT -> cost
		emissions = {}

		# CPT -> CPT
		testTransitions = {}
		trainTransitions = {}

		# gold standard for testing
		goldStandard = {}

		# initial one will be thrown away
		transitions = {}

		isTest = False

		previousCptCode = utils.startState
		for row in csv_file_object:
			rowMemberId = row[0]
			dependentId = row[1]
			rawCode = row[3]

			currentCptCode = utils.buildTransition(buildType, row[7], row[6], '', '') + rawCode
			# unfilteredCptCode = self.createRxState(previousCptCode, currentCptCode)

			patientAmount = float(row[4])
			totalAmount = str(patientAmount)

			if rowMemberId != currentMemberId or dependentId != currentDependentId:
				# set final state
				self.setDict(transitions, previousCptCode, utils.endState)
				self.setDict(emissions, previousCptCode + "_" + utils.endState, totalAmount)
				
				(transitions, isTest) = self.determineDictionary(testTransitions, trainTransitions)

				# set start state
				startState = utils.startState
				self.setDict(transitions, startState, currentCptCode)
				self.setDict(emissions, startState + "_" + currentCptCode, totalAmount)

				if isTest:
					goldStandard[rowMemberId + dependentId] = [(startState, 0)]
					goldStandard[rowMemberId + dependentId].append((currentCptCode, totalAmount, row[6], row[7], '', ''))
				
				currentMemberId = rowMemberId
				currentDependentId = dependentId
				previousCptCode = currentCptCode
				continue

			self.setDict(transitions, previousCptCode, currentCptCode)
			self.setDict(emissions, previousCptCode + "_" + currentCptCode, totalAmount)
			
			if isTest:
				goldStandard[rowMemberId + dependentId].append((currentCptCode, totalAmount, row[6], row[7], '', ''))

			previousCptCode = currentCptCode

		# create probabilities out of these now
		emissionsProb = self.buildDict(emissions)
		trainTransitionsProb = self.buildDict(trainTransitions)
		testTransitionsProb = self.buildDict(testTransitions)

		return (emissionsProb, trainTransitionsProb, testTransitionsProb, goldStandard)
 def buildTransitionWrapper(gender, year, state):
     return utils.buildTransition(utils.ageGender, gender, year, "", "") + state