def main(dataset_name): num_class, num_feature, x_train, y_train, x_test, y_test = readdata.read_dataset(dataset_name) print 'Number of folds:' nfold = int(input()) print 'Preparing cv dataset...' cv_dataset = prepare_cv_dataset(x_train, y_train, nfold) k = num_feature / 2 bestk = k bestdelta0 = 0 highest_prec = 0 """ while 1: print 'Input delta:' s = raw_input().strip() if s == '': break delta0 = float(s) avg_precision = cross_validation(cv_dataset, num_class, k, delta0) print 'cross valiation: k=%d, delta=%f, avg precision=%f\n' % (k, delta0, avg_precision) if avg_precision > highest_prec: highest_prec = avg_precision bestk = k bestdelta0 = delta0 print 'Best k and delta0: ', bestk, bestdelta0 print 'Best avg precision: ', highest_prec k = bestk delta0 = bestdelta0 """ prior, mean, eigenvalue, eigenvector, delta = build_MQDF_model(num_class, x_train, y_train, k, 0) y_pred = MQDF_predict(x_test, num_class, k, mean, eigenvalue, eigenvector, delta) #pdb.set_trace() print sklearn.metrics.classification_report(y_test, y_pred) print 'Average accuracy: ', sklearn.metrics.accuracy_score(y_test, y_pred)
def main(dataset_name): num_class, num_feature, x_train, y_train, x_test, y_test = \ readdata.read_dataset(dataset_name) #prepare_cv_dataset(x_train, y_train, 3) print 'Number of folds:' nfold = int(input()) print 'Preparing cv dataset...' cv_dataset = prepare_cv_dataset(x_train, y_train, nfold) best = [0, 0, 0] # beta, gamma, highest precision while 1: print 'Input beta, gamma:' s = raw_input().strip() if s == '': break beta, gamma = s.split() beta = float(beta) gamma = float(gamma) avg_precision = cross_validation(cv_dataset, num_class, beta, gamma) print 'cross valiation: beta=%f, gamma=%f, avg precision=%f' % (beta, gamma, avg_precision) if avg_precision > best[2]: best[2] = avg_precision best[0] = beta best[1] = gamma print 'Best beta and gamma: ', best[0], best[1] print 'Best avg precision: ', best[2] beta = best[0] gamma = best[1] prior, mean, cov_matrix = build_RDA_model(num_class, x_train, y_train, beta, gamma) # predict like QDF y_pred = QDF_predict(x_test, num_class, prior, mean, cov_matrix) print sklearn.metrics.classification_report(y_test, y_pred) print 'Average accuracy: ', sklearn.metrics.accuracy_score(y_test, y_pred)
score /= nfold return score if __name__ == '__main__': import sys dataset_name = sys.argv[1] num_class, num_feature, x_train, y_train, x_test, y_test = readdata.read_dataset(dataset_name) x_train, y_train = make_small_dataset(x_train, y_train, 500) x_test, y_test = make_small_dataset(x_test, y_test, 200) print 'Number of folds:' nfold = int(input()) print 'Preparing cv dataset...' cv_dataset = prepare_cv_dataset(x_train, y_train, nfold) bestk = 0 highest_prec = 0 while 1: print 'Input number of nearest neighbor:' s = raw_input().strip() if s == '': break k = int(s) avg_precision = cross_validation(cv_dataset, num_class, k) print 'cross valiation: k=%d, avg precision=%f' % (k, avg_precision) if avg_precision > highest_prec: highest_prec = avg_precision