コード例 #1
0
def solveAssignment(training_file, test_file, k):

    # read data from both files as it is
    features, training_class, training_data = nb.readFile(training_file)
    f2, ref_ids, test_data = nb.readFile(test_file)

    # find max number of attributes in both the files
    max_attribute, max_attribute_values = nb.findMaxNumAttributes(training_data, test_data)

    # format training and test data which can be used by classifier
    # training_class, training_data = nb.formatData(training_data, max_attribute)
    # test_class, test_data = nb.formatData(test_data, max_attribute)

    # make k classifiers from training_data and class labels using ensemble method adaboost
    kClassifiers, kClassifiers_errors = formEnsembleClassifiers(training_class, training_data, max_attribute_values, k)
    # print kClassifiers_errors
    # predict using all the classifier built using adaboost on test data
    boosted_predicted_class = ensembleClassify(training_data, training_class, kClassifiers, kClassifiers_errors)
    boosted_predicted_test_class = ensembleClassify(test_data, [], kClassifiers, kClassifiers_errors)

    output = []
    for i in range(0, len(boosted_predicted_test_class)):
        output.append([ref_ids[i], boosted_predicted_test_class[i]])

    with open("output.csv", "wb") as f:
        writer = csv.writer(f)
        writer.writerows(output)