def __init__(self):
     Confusion.__init__(self)
     self.symbolicExec = None
     self.flaRecover = None
def tenFoldData(k, positive_training_size, negative_training_size, prior_positive_fold_probability, prior_negative_fold_probability):
    """Summary
    
    Args:
        k (TYPE): Fold number
        positive_training_size (TYPE): The training size for the positive reviews in the fold method
        negative_training_size (TYPE): The training size for the positive reviews in the fold method
        prior_positive_fold_probability (TYPE): The probability of the positive reviews over both reviews for fold method
        prior_negative_fold_probability (TYPE): The probability of the negative reviews over both reviews for fold method
    
    Returns:
        TYPE: Calculates the confusion_matrix and accuracy of the reviews using the 10 fold cross validation method
    """
    total_tp = []
    total_fp = []
    total_tn = []
    total_fn = []
    total_accuracy = []
    for f in xrange(0, k):
        confusion_matrix = Confusion()
        positive_training_list = glob.glob(
            'review_polarity/txt_sentoken/pos/cv[!' + str(f) + ']*.txt')
        new_positive_training_list = sentimentList(positive_training_list)
        positive_sum_training_list = sentimentSumList(
            new_positive_training_list)
        positive_probability_training_list = sentimentProbabilityList(
            positive_sum_training_list, positive_training_size)
        positive_testing_list = glob.glob(
            'review_polarity/txt_sentoken/pos/cv' + str(f) + '*.txt')
        new_positive_testing_list = sentimentList(positive_testing_list)
        negative_training_list = glob.glob(
            'review_polarity/txt_sentoken/neg/cv[!' + str(f) + ']*.txt')
        new_negative_training_list = sentimentList(negative_training_list)
        negative_sum_training_list = sentimentSumList(
            new_negative_training_list)
        negative_probability_training_list = sentimentProbabilityList(
            negative_sum_training_list, negative_training_size)
        negative_testing_list = glob.glob(
            'review_polarity/txt_sentoken/neg/cv' + str(f) + '*.txt')
        new_negative_testing_list = sentimentList(negative_testing_list)
        for positive_vector in new_positive_testing_list:
            polarity = naiveBayesClassifier(positive_vector, prior_positive_fold_probability,
                                            prior_negative_fold_probability, positive_probability_training_list, negative_probability_training_list)
            if polarity:
                confusion_matrix.incrementTP()
            else:
                confusion_matrix.incrementFP()

        for negative_vector in new_negative_testing_list:
            polarity = naiveBayesClassifier(negative_vector, prior_positive_fold_probability,
                                            prior_negative_fold_probability, positive_probability_training_list, negative_probability_training_list)
            if polarity:
                confusion_matrix.incrementFN()
            else:
                confusion_matrix.incrementTN()
        total_tp.append(confusion_matrix.getTP())
        total_fp.append(confusion_matrix.getFP())
        total_tn.append(confusion_matrix.getTN())
        total_fn.append(confusion_matrix.getFN())
        accuracy = (confusion_matrix.getTP() + confusion_matrix.getTN()) / float(confusion_matrix.getTP() +
                                                                                 confusion_matrix.getFP() + confusion_matrix.getTN() + confusion_matrix.getFN())
        total_accuracy.append(accuracy)
    print "True Positive: " + str(sum(total_tp))
    print "False Positive: " + str(sum(total_fp))
    print "True Negative: " + str(sum(total_tn))
    print "False Negative: " + str(sum(total_fn))
    print "10-Fold Cross Validation Method"
    print "Accuracy: " + str(sum(total_accuracy) / k)
def completeData(positive_review_count, negative_review_count, new_positive_list, new_negative_list, prior_positive_probability, prior_negative_probability):
    """Summary
    
    Args:
        positive_review_count (TYPE): Total number of positive reviews
        negative_review_count (TYPE): Total number of negative reviews
        new_positive_list (TYPE): Binary list of lists for the positive reviews
        new_negative_list (TYPE): Binary list of lists for the negative reviews
        prior_positive_probability (TYPE): The probability of the positive reviews over both reviews
        prior_negative_probability (TYPE): The probability of the negative reviews over both reviews
    
    Returns:
        TYPE: Runs the complete data set as both the training and testing sets to get the confusion matrix and accuracy
    """
    negative_sum_list = sentimentSumList(new_negative_list)
    negative_probability_list = sentimentProbabilityList(
        negative_sum_list, negative_review_count)
    positive_sum_list = sentimentSumList(new_positive_list)
    positive_probability_list = sentimentProbabilityList(
        positive_sum_list, positive_review_count)
    print positive_probability_list
    print negative_probability_list
    confusion_matrix = Confusion()
    for positive_vector in new_positive_list:
        polarity = naiveBayesClassifier(positive_vector, prior_positive_probability,
                                        prior_negative_probability, positive_probability_list, negative_probability_list)
        if polarity:
            confusion_matrix.incrementTP()
        else:
            confusion_matrix.incrementFP()

    for negative_vector in new_negative_list:
        polarity = naiveBayesClassifier(negative_vector, prior_positive_probability,
                                        prior_negative_probability, positive_probability_list, negative_probability_list)
        if polarity:
            confusion_matrix.incrementFN()
        else:
            confusion_matrix.incrementTN()

    accuracy = (confusion_matrix.getTP() + confusion_matrix.getTN()) / float(confusion_matrix.getTP() +
                                                                             confusion_matrix.getFP() + confusion_matrix.getTN() + confusion_matrix.getFN())

    print "True Positive: " + str(confusion_matrix.getTP())
    print "False Positive: " + str(confusion_matrix.getFP())
    print "True Negative: " + str(confusion_matrix.getTN())
    print "False Negative: " + str(confusion_matrix.getFN())
    print "Complete Data Method"
    print "Accuracy: " + str(accuracy)
Example #4
0
    X_train, X_test, y_train, y_test = train_test_split(
        synthetic[['x1', 'x2']],
        synthetic[['y']],
        test_size=test_size,
        stratify=synthetic[['y']])

    # scatter_plot(X_train, y_train, 'Train set')
    # scatter_plot(X_test, y_test, 'Test set')

    print('GaussianBayes:')
    bc = GaussianBayesClassifier()
    bc.fit(X_train, y_train)
    pred = bc.evaluate(X_test)
    real = list(y_test['y'])
    confusion = Confusion.from_wrong_preds(['A1', 'A2'], pred, real, {
        'A1': 50,
        'A2': 50
    })
    print(confusion)

    print('ParzenBayes:')
    pc = ParzenBayesClassifier()
    pc.fit(X_train, y_train)
    pred = pc.evaluate(X_test)
    real = list(y_test['y'])
    confusion = Confusion.from_wrong_preds(['A1', 'A2'], pred, real, {
        'A1': 50,
        'A2': 50
    })
    print(confusion)

    #####################################################################################