Example #1
0
def main(args):
    laplace_number_set = np.linspace(0, 0.2, 2001)
    print(laplace_number_set)
    laplace_performance = np.zeros(2000)
    for number in range(2000):
        if (number == 0):
            laplace_performance[number] = 0
        else:
            train_set, train_labels, dev_set, dev_labels = reader.load_dataset(
                args.training_dir, args.development_dir, args.stemming)
            predicted_labels = nb.naiveBayes(train_set, train_labels, dev_set,
                                             laplace_number_set[number])
            accuracy, f1, precision, recall = compute_accuracies(
                predicted_labels, dev_set, dev_labels)
            laplace_performance[number] = accuracy

    best_laplace = np.argmax(laplace_performance)
    train_set, train_labels, dev_set, dev_labels = reader.load_dataset(
        args.training_dir, args.development_dir, args.stemming)
    predicted_labels = nb.naiveBayes(train_set, train_labels, dev_set,
                                     best_laplace)
    accuracy, f1, precision, recall = compute_accuracies(
        predicted_labels, dev_set, dev_labels)
    laplace_performance[number] = accuracy
    print("best laplace parameter:", best_laplace)
    print("Accuracy:", accuracy)
    print("F1-Score:", f1)
    print("Precision:", precision)
    print("Recall:", recall)
Example #2
0
def test_unigram_dev_stem_false_lower_false():
    print("Running unigram test..."+'\n')
    train_set, train_labels, dev_set, dev_labels = reader.load_dataset(
    	"data/spam_data/train",
    	"data/spam_data/dev",
        stemming=False,
        lower_case=False,
        use_tqdm=False
    )
    predicted_labels = nb.naiveBayes(
        train_set, train_labels, dev_set, smoothing_parameter=1.0, pos_prior=0.5)

    if len(predicted_labels) != len(dev_labels):
    	print("The length of the list of predictions is not equivalent to the length of the list of development labels.")
    	errorDict = {
    	'name': 'Unigram test on dev set without stemming and without lowercase',
		'score': 0,
		'max_score': 20,
		'visibility': 'visible'
    	}
    	return json.dumps(errorDict, indent=1)
    (
        accuracy,
        f1,
        precision,
        recall,
    ) = mp2.compute_accuracies(predicted_labels, dev_set, dev_labels)
    print("Accuracy:",accuracy)
    print("F1-Score:",f1)
    print("Precision:",precision)
    print("Recall:",recall)

    total_score = 0
    if accuracy >= 0.81:
        total_score += 5
        print("+ 5 points for accuracy  above " + str(0.81))
    else:
        print("Accuracy needs to be above " + str(0.81))
    if accuracy >= 0.86:
        total_score += 5
        print("+ 5 points for accuracy above " + str(0.86))
    else:
        print("Accuracy needs to be above " + str(0.86))
    if accuracy >= 0.91:
        total_score += 5
        print("+ 5 points for accuracy above " + str(0.91))
    else:
        print("Accuracy needs to be above " + str(0.91))
    if accuracy >= 0.95:
        total_score += 5
        print("+ 5 points for accuracy above " + str(0.95))
    else:
        print("Accuracy needs to be above " + str(0.95))
    resultDict = {
    	'name': 'Unigram test on dev set without stemming and without lowercase',
		'score': total_score,
		'max_score': 20,
		'visibility': 'visible'
    }
    return json.dumps(resultDict, indent=1)
Example #3
0
def main(args):
    train_set, train_labels, dev_set,dev_labels = reader.load_dataset(args.training_dir,args.development_dir,args.stemming)
    predicted_labels = nb.naiveBayes(train_set,train_labels, dev_set, args.laplace)
    accuracy,f1,precision,recall = compute_accuracies(predicted_labels,dev_set,dev_labels)
    print("Accuracy:",accuracy)
    print("F1-Score:",f1)
    print("Precision:",precision)
    print("Recall:",recall)
def main(args):
    #Modify stemming and lower case below. Note that our test cases may use both settings of the two parameters
    train_set, train_labels, dev_set, dev_labels = reader.load_dataset(args.training_dir,args.development_dir,stemming=False,lower_case=False)

    predicted_labels = nb.naiveBayes(train_set, train_labels, dev_set)
    accuracy, false_positive, false_negative, true_positive, true_negative = compute_accuracies(predicted_labels,dev_labels)
    print("Accuracy:",accuracy)
    print("False Positive", false_positive)
    print("Fale Negative", false_negative)
    print("True Positive", true_positive)
    print("True Negative", true_negative)
    def all_algo_model(self):

        print(" \n----------------------------------\n")
        print("\n Decision Tree \n")
        print(" \n----------------------------------\n")

        #decisionTree().decision_tree_algo()

        print(" \n----------------------------------\n")
        print("\n Gradient Descent \n")
        print(" \n----------------------------------\n")

        gradientDescent().gradient_descent_algo()

        print(" \n----------------------------------\n")
        print("\n K-Nearest Neighbour \n")
        print(" \n----------------------------------\n")

        #KNNAlgo().KNN__model_algo()

        print(" \n----------------------------------\n")
        print("\n Linear Regression \n")
        print(" \n----------------------------------\n")

        linearRegression().linear_regression_algo()

        print(" \n----------------------------------\n")
        print("\n Naive Bayes \n")
        print(" \n----------------------------------\n")

        naiveBayes().naive_bayes_algo()

        print(" \n----------------------------------\n")
        print("\n Support Vector Machine \n")
        print(" \n----------------------------------\n")

        svmModule().svm_model_algo()

        print("\n---------- End ---------------------\n")
Example #6
0
def main():
    # List of patient objects
    patient_list = parse_csv()

    # create the ten folds
    ten_folds_strat_list = stratify_data(patient_list)

    # create the classifer objects
    knn = kNN()
    naive_bayes = naiveBayes()

    # call the 10-fold cross validation
    ten_fold_strat_cross_validation(knn, ten_folds_strat_list, 10)
    ten_fold_strat_cross_validation(naive_bayes, ten_folds_strat_list)
Example #7
0
def main(args):
    train_set, train_labels, dev_set, dev_labels = reader.load_dataset(
        args.training_dir, args.development_dir, args.stemming)
    predicted_labels = nb.naiveBayes(train_set, train_labels, dev_set,
                                     args.laplace)
    answer = open('answer.txt', 'w')
    test = open('./data/test.jsonl', 'r').readlines()
    for i in range(len(test)):
        answer.write(json.loads(test[i])['id'])
        answer.write(",")
        if predicted_labels[i] == 0:
            answer.write("SARCASM")
        else:
            answer.write("NOT_SARCASM")
        answer.write("\n")
Example #8
0
len_bootstrap = 100

# create bootstrap with random data from data train
list_bootstrap = {}
for i in range(num_bootstrap):
    bootstrap = []
    for j in range(len_bootstrap):
        randomData = dataTrain[np.random.randint(0, len(dataTrain))]
        bootstrap.append(randomData)
    list_bootstrap[i] = np.copy(bootstrap)

# predict class result from data test with each bootstrap
model_result = {}
for i in range(num_bootstrap):
    dataTrain = list_bootstrap[i]
    model_result[i] = nb.naiveBayes(dataTrain, dataTest, class_index=2)

# voting all model for final result
final_result = []
for i in range(len(dataTest)):
    temp = [model_result[model_idx][i] for model_idx in model_result]
    temp = collections.Counter(temp)
    result = max(temp.items(), key=operator.itemgetter(1))[0]
    final_result.append(result)
print(final_result)

# save result in csv file
data_file = open('Result.csv', 'w', newline='')
with data_file:
    writer = csv.writer(data_file)
    for row in final_result:
Example #9
0
import scipy.io as io
import preprocessor as pre
from naive_bayes import naiveBayes
from logit import logisitcRegression

Numpyfile = io.loadmat('mnist_data.mat')
trX = Numpyfile['trX']
trY = Numpyfile['trY']
tsX = Numpyfile['tsX']
tsY = Numpyfile['tsY']

#extracting features (average value of pixels, standard deviation of pixels)
trxFeatures = pre.extractFeatures(trX)
tsxFeatures = pre.extractFeatures(tsX)

mean_7,mean_8,sd_7,sd_8,accuracy_bayesian_7,accuracy_bayesian_8,accuracy_bayesian_total=naiveBayes(trxFeatures,trY[0],tsxFeatures,tsY[0])
w,accuracy_logit_7,accuracy_logit_8,accuracy_logit_total = logisitcRegression(trxFeatures,trY[0],tsxFeatures,tsY[0])

print("Accuracy for Naive Bayes:")
print("For Digit 7: ",accuracy_bayesian_7*100)
print("For Digit 8: ",accuracy_bayesian_8*100)
print("Overall: ",accuracy_bayesian_total*100)
print("\n")
print("Accuracy for Logistic Regression: ")
print("For Digit 7: ",accuracy_logit_7*100)
print("For Digit 8: ",accuracy_logit_8*100)
print("Overall: ",accuracy_logit_total*100)