def getAccuracyForInclusiveSubset(output_dir, algo_key, data, attributes, subset): """ Return accuracy for algorithm with name <algo_key> on <data> and attributes <attributes> for the attribute subset <subset> """ assert(len(attributes) >= 2) training_filename = makeFileName(output_dir,'find-best-attr', algo_key, None, 'arff') writeArffForInclusiveSubset(training_filename, data, attributes, subset) result = WC.getAccuracyAlgoKey(algo_key, class_index, training_filename) misc.rm(training_filename) return result
def getAccuracyForSplit(base_data, split_vector): """ Split <base_data> into training and test data sets. Rows with indexes in <split_vector> go into training file and remaining go into test file. Run prediction and return accuracy """ training_filename, test_filename = makeTrainingTestSplit(base_data, split_vector, "temp") accuracy = getAccuracy(training_filename, test_filename) misc.rm(training_filename) misc.rm(test_filename) return accuracy
def getSubsetResultDictList(algo_key, data, attributes, exclusive_subset_list): """ Returns a list of dicts that shows results of running <algo_key> classfier on <data> for (complement of x) <attributes) for x in <exclusive_subset_list> """ assert(None not in exclusive_subset_list) number = len(exclusive_subset_list) inclusive_subset_list = [getInclusiveSubset(attributes, subset, is_inclusive) for subset in exclusive_subset_list] training_filename_list = [makeFileName(output_dir, 'find-best-attr%02d'% i, algo_key, None, 'arff') for i in range(number)] for i in range(number): writeArffForInclusiveSubset(training_filename_list[i], data, attributes, inclusive_subset_list[i]) score_eval_list = WC.getAccuracyAlgoKeyList(algo_key, class_index, training_filename_list) #print 'score_eval_list', score_eval_list for training_filename in training_filename_list: misc.rm(training_filename) result_list = [{'subset':exclusive_subset_list[i], 'score':score_eval_list[i][0], 'eval': score_eval_list[i][1]} for i in range(number)] if verbose or show_scores: for result in result_list: print 'getSubsetResultDict =>', result['score'], result['subset'] return result_list