# Build Targets
print("Build target vector and data vector from documents")


def party_fn(speech):
    if speech.speaker_party == 'D':
        return 1
    elif speech.speaker_party == 'R':
        return 0
    else:
        raise Exception("Speech must have party 'D' or 'R': " +
                        str(speech.speech_id))


bunch = Classifier.bunch_with_targets(speeches=speeches,
                                      target_function=party_fn)
data = vectorizer.fit_transform(bunch.data)  #.tocsr()#.toarray()

# Print Stuff
learned_vocabulary = vectorizer.get_feature_names()
print("Learned %d words in vocabulary" % len(learned_vocabulary))
print(learned_vocabulary)
print("")
print("Sparse Matrix of TfIdf Values pf each term for each document")
print data

target = array(bunch.target)

print("")

# Run Cross Validation Checks
	print "%d democratic speeches" % len(dem_speeches)

	# ipdb.set_trace()

	# bayseian_prior_a_rep = len(rep_speeches) / len(speeches)
	# bayseian_prior_b_dem = len(dem_speeches) / len(speeches)
	# this frame vocabulary proba has tuples for the proba of class a and b
	# frame_vocabulary_proba =  { word: vocabulary_proba[word] if vocabulary_proba.get(word) != None else [0, 0] for word in frame.word_string.split() }
	# sum_log_probability_a_rep = sum(map(lambda (word,log_probabilities): log_probabilities[0],frame_vocabulary_proba.items()))
	# sum_log_probability_b_dem = sum(map(lambda (word,log_probabilities): log_probabilities[1],frame_vocabulary_proba.items()))
	# final_prob_a = bayseian_prior_a_rep * sum_log_probability_a_rep
	# final_prob_b = bayseian_prior_b_dem * sum_log_probability_b_dem

	print "Recompute Naieve Bayes Output For Classifying Frame (%s) Within Window (%s) for phrase %s" % (frame.seed_word, speech_window_key, analysis.phrase)
	naive_bayes = Classifier(vocab=frame.word_string.split())
	training_set = Classifier.bunch_with_targets(speeches, analysis.target_function2)
	naive_bayes.train_classifier(training_set.data, training_set.target)
	probabilities = naive_bayes.classify_document(frame.word_string)

	tfidf_frames_vector = naive_bayes.vectorizer.transform([frame.word_string])

	print "Predicted Class: ", naive_bayes.classifier.predict(tfidf_frames_vector)[0]
	print "Predict Proba: ", naive_bayes.classifier.predict_proba(tfidf_frames_vector)[0]

	print "Probability A (Rep): ", probabilities[0]
	print "Probability B (Dem): ", probabilities[1]

	if probabilities[0] > probabilities[1]:
		print t.red("A (Rep) NB Proba > B (Dem) NB Proba: Classify Republican")
	else:
		print t.cyan("B (Dem) NB Proba > A (Rep) NB Proba: Classify Democratic")