def solveAssignment(training_file, test_file, k): # read data from both files as it is features, training_class, training_data = nb.readFile(training_file) f2, ref_ids, test_data = nb.readFile(test_file) # find max number of attributes in both the files max_attribute, max_attribute_values = nb.findMaxNumAttributes(training_data, test_data) # format training and test data which can be used by classifier # training_class, training_data = nb.formatData(training_data, max_attribute) # test_class, test_data = nb.formatData(test_data, max_attribute) # make k classifiers from training_data and class labels using ensemble method adaboost kClassifiers, kClassifiers_errors = formEnsembleClassifiers(training_class, training_data, max_attribute_values, k) # print kClassifiers_errors # predict using all the classifier built using adaboost on test data boosted_predicted_class = ensembleClassify(training_data, training_class, kClassifiers, kClassifiers_errors) boosted_predicted_test_class = ensembleClassify(test_data, [], kClassifiers, kClassifiers_errors) output = [] for i in range(0, len(boosted_predicted_test_class)): output.append([ref_ids[i], boosted_predicted_test_class[i]]) with open("output.csv", "wb") as f: writer = csv.writer(f) writer.writerows(output)