def __init__(self): Confusion.__init__(self) self.symbolicExec = None self.flaRecover = None
def tenFoldData(k, positive_training_size, negative_training_size, prior_positive_fold_probability, prior_negative_fold_probability): """Summary Args: k (TYPE): Fold number positive_training_size (TYPE): The training size for the positive reviews in the fold method negative_training_size (TYPE): The training size for the positive reviews in the fold method prior_positive_fold_probability (TYPE): The probability of the positive reviews over both reviews for fold method prior_negative_fold_probability (TYPE): The probability of the negative reviews over both reviews for fold method Returns: TYPE: Calculates the confusion_matrix and accuracy of the reviews using the 10 fold cross validation method """ total_tp = [] total_fp = [] total_tn = [] total_fn = [] total_accuracy = [] for f in xrange(0, k): confusion_matrix = Confusion() positive_training_list = glob.glob( 'review_polarity/txt_sentoken/pos/cv[!' + str(f) + ']*.txt') new_positive_training_list = sentimentList(positive_training_list) positive_sum_training_list = sentimentSumList( new_positive_training_list) positive_probability_training_list = sentimentProbabilityList( positive_sum_training_list, positive_training_size) positive_testing_list = glob.glob( 'review_polarity/txt_sentoken/pos/cv' + str(f) + '*.txt') new_positive_testing_list = sentimentList(positive_testing_list) negative_training_list = glob.glob( 'review_polarity/txt_sentoken/neg/cv[!' + str(f) + ']*.txt') new_negative_training_list = sentimentList(negative_training_list) negative_sum_training_list = sentimentSumList( new_negative_training_list) negative_probability_training_list = sentimentProbabilityList( negative_sum_training_list, negative_training_size) negative_testing_list = glob.glob( 'review_polarity/txt_sentoken/neg/cv' + str(f) + '*.txt') new_negative_testing_list = sentimentList(negative_testing_list) for positive_vector in new_positive_testing_list: polarity = naiveBayesClassifier(positive_vector, prior_positive_fold_probability, prior_negative_fold_probability, positive_probability_training_list, negative_probability_training_list) if polarity: confusion_matrix.incrementTP() else: confusion_matrix.incrementFP() for negative_vector in new_negative_testing_list: polarity = naiveBayesClassifier(negative_vector, prior_positive_fold_probability, prior_negative_fold_probability, positive_probability_training_list, negative_probability_training_list) if polarity: confusion_matrix.incrementFN() else: confusion_matrix.incrementTN() total_tp.append(confusion_matrix.getTP()) total_fp.append(confusion_matrix.getFP()) total_tn.append(confusion_matrix.getTN()) total_fn.append(confusion_matrix.getFN()) accuracy = (confusion_matrix.getTP() + confusion_matrix.getTN()) / float(confusion_matrix.getTP() + confusion_matrix.getFP() + confusion_matrix.getTN() + confusion_matrix.getFN()) total_accuracy.append(accuracy) print "True Positive: " + str(sum(total_tp)) print "False Positive: " + str(sum(total_fp)) print "True Negative: " + str(sum(total_tn)) print "False Negative: " + str(sum(total_fn)) print "10-Fold Cross Validation Method" print "Accuracy: " + str(sum(total_accuracy) / k)
def completeData(positive_review_count, negative_review_count, new_positive_list, new_negative_list, prior_positive_probability, prior_negative_probability): """Summary Args: positive_review_count (TYPE): Total number of positive reviews negative_review_count (TYPE): Total number of negative reviews new_positive_list (TYPE): Binary list of lists for the positive reviews new_negative_list (TYPE): Binary list of lists for the negative reviews prior_positive_probability (TYPE): The probability of the positive reviews over both reviews prior_negative_probability (TYPE): The probability of the negative reviews over both reviews Returns: TYPE: Runs the complete data set as both the training and testing sets to get the confusion matrix and accuracy """ negative_sum_list = sentimentSumList(new_negative_list) negative_probability_list = sentimentProbabilityList( negative_sum_list, negative_review_count) positive_sum_list = sentimentSumList(new_positive_list) positive_probability_list = sentimentProbabilityList( positive_sum_list, positive_review_count) print positive_probability_list print negative_probability_list confusion_matrix = Confusion() for positive_vector in new_positive_list: polarity = naiveBayesClassifier(positive_vector, prior_positive_probability, prior_negative_probability, positive_probability_list, negative_probability_list) if polarity: confusion_matrix.incrementTP() else: confusion_matrix.incrementFP() for negative_vector in new_negative_list: polarity = naiveBayesClassifier(negative_vector, prior_positive_probability, prior_negative_probability, positive_probability_list, negative_probability_list) if polarity: confusion_matrix.incrementFN() else: confusion_matrix.incrementTN() accuracy = (confusion_matrix.getTP() + confusion_matrix.getTN()) / float(confusion_matrix.getTP() + confusion_matrix.getFP() + confusion_matrix.getTN() + confusion_matrix.getFN()) print "True Positive: " + str(confusion_matrix.getTP()) print "False Positive: " + str(confusion_matrix.getFP()) print "True Negative: " + str(confusion_matrix.getTN()) print "False Negative: " + str(confusion_matrix.getFN()) print "Complete Data Method" print "Accuracy: " + str(accuracy)
X_train, X_test, y_train, y_test = train_test_split( synthetic[['x1', 'x2']], synthetic[['y']], test_size=test_size, stratify=synthetic[['y']]) # scatter_plot(X_train, y_train, 'Train set') # scatter_plot(X_test, y_test, 'Test set') print('GaussianBayes:') bc = GaussianBayesClassifier() bc.fit(X_train, y_train) pred = bc.evaluate(X_test) real = list(y_test['y']) confusion = Confusion.from_wrong_preds(['A1', 'A2'], pred, real, { 'A1': 50, 'A2': 50 }) print(confusion) print('ParzenBayes:') pc = ParzenBayesClassifier() pc.fit(X_train, y_train) pred = pc.evaluate(X_test) real = list(y_test['y']) confusion = Confusion.from_wrong_preds(['A1', 'A2'], pred, real, { 'A1': 50, 'A2': 50 }) print(confusion) #####################################################################################