Esempio n. 1
0
def main():
  df = pandas.read_csv(args.input_filename, index_col=False, header=0)
  data = df.values
  column_names = df.columns.values.tolist()
  feature_names = column_names[0:args.label_column]
  label_name = column_names[args.label_column]

  # Extract features/labels and their names from raw data
  features = data[:, 0:args.label_column]
  labels = data[:, args.label_column].astype(int)
  orig_train_features, orig_test_features, train_labels, test_labels = (
      model_selection.train_test_split(features, labels, test_size=args.test_size))

  (model, train_features, train_labels, test_features) = models.train_knn(
      orig_train_features, train_labels, orig_test_features,
      args.imbalanced_data, args.train_size,
      args.scaling_method, args.minmax_min, args.minmax_max,
      args.skip_feature_selection, args.skip_grid_search,
      args.num_neighbors, args.weights, args.algorithm, args.metric,
      args.num_jobs)
  # Report accuracy
  y_true, y_pred = test_labels, model.predict(test_features)
  predicted_probabilities = model.predict_proba(test_features)
  print("Test Accuracy: %0.2f%%" % (model.score(test_features, test_labels)*100.))
  print('AUC score: %0.5f' % roc_auc_score(y_true, predicted_probabilities[:,1]))
  
  # full report
  print("\n*****************************\n")
  labels = [0 , 1]
  target_names = ["female" , "male"]
  print(classification_report(y_true, y_pred, labels, target_names))

  # Now perform the evaluation on the test data at different probability thresholds.
  # The idea is we report the accuracy only for points whose predicted probability
  # for either label is above the specified threshold.
  utils.print_threshold_metrics(predicted_probabilities, y_true, labels)
  
  # write test features along with last bit indicating whether prediction was correct.
  if args.output_filename_prefix:
    utils.write_data_predictions(args.output_filename_prefix, orig_test_features,
                                 feature_names, y_true, y_pred)
Esempio n. 2
0
def compute_trial_metrics(data):
  features = data[0]
  labels = data[1]
  train_sizes = data[2]
  random_seed = data[3]

  random.seed(random_seed)
  train_features, test_features, train_labels, test_labels = (
      model_selection.train_test_split(features, labels, test_size=args.test_size,
                                        random_state=random.randint(1,99999999)))

  # mapping from train size to any of "accuracy", "precision"... to a value
  trial_metrics = defaultdict(dict)
  for train_size in train_sizes:
    if args.learning_algorithm == 'random-forest':
      (model, transformed_train_features, transformed_train_labels, transformed_test_features) = models.train_random_forest(
          train_features, train_labels, test_features,
          args.scikit_balancing, train_size,
          args.skip_feature_selection, args.skip_grid_search,
          args.rf_max_features, args.rf_num_trees, args.rf_criterion,
          args.rf_min_samples_split, args.rf_min_samples_leaf, 1)
    elif args.learning_algorithm == 'svm':
      (model, transformed_train_features, transformed_train_labels, transformed_test_features) = models.train_svm(
          train_features, train_labels, test_features,
          args.scikit_balancing, train_size,
          'minmax', 0, 1,
          args.skip_feature_selection, args.skip_grid_search,
          args.svm_kernel, args.svm_gamma, args.svm_cost, args.svm_degree, 1)
    elif args.learning_algorithm == 'logistic':
      (model, transformed_train_features, transformed_train_labels, transformed_test_features) = models.train_logistic(
          train_features, train_labels, test_features,
          args.scikit_balancing, train_size,
          args.skip_feature_selection, args.skip_grid_search,
          args.logistic_penalty, args.logistic_cost, args.logistic_dual,
          args.logistic_tolerance, 1)
    elif args.learning_algorithm == 'knn':
      (model, transformed_train_features, transformed_train_labels, transformed_test_features) = models.train_knn(
          train_features, train_labels, test_features,
          args.scikit_balancing, train_size,
          'minmax', 0, 1,
          args.skip_feature_selection, args.skip_grid_search,
          args.knn_num_neighbors, args.knn_weights, args.knn_algorithm,
          args.knn_metric, 1)
    else:
      sys.exit('bad algorithm name.')

    y_true, y_pred = test_labels, model.predict(transformed_test_features)

    # size of labels in train/test.
    test_size = transformed_test_features.shape[0]
    test_female_size = sum(test_labels[:] == utils.FEMALE)
    test_male_size = sum(test_labels[:] == utils.MALE)
    
    # Compute evaluation metrics
    test_accuracy = model.score(transformed_test_features, test_labels)*100.
    (precisions, recalls, fscores, supports) = precision_recall_fscore_support(
        y_true=y_true, y_pred=y_pred, labels=[0, 1])
    # Get true/false positive/negative
    confusion = confusion_matrix(y_true, y_pred)
    test_true_female = confusion[utils.FEMALE][utils.FEMALE]
    test_false_female = confusion[utils.MALE][utils.FEMALE]
    test_true_male = confusion[utils.MALE][utils.MALE]
    test_false_male = confusion[utils.FEMALE][utils.MALE]

    trial_metrics[train_size]["train_size"] = train_size
    trial_metrics[train_size]["test_size"] = test_size
    trial_metrics[train_size]["test_female_size"] = test_female_size
    trial_metrics[train_size]["test_male_size"] = test_male_size
    trial_metrics[train_size]["test_true_female"] = test_true_female
    trial_metrics[train_size]["test_false_female"] = test_false_female
    trial_metrics[train_size]["test_true_male"] = test_true_male
    trial_metrics[train_size]["test_false_male"] = test_false_male
    trial_metrics[train_size]["test_accuracy"] = test_accuracy
    trial_metrics[train_size]["test_female_precision"] = precisions[utils.FEMALE]
    trial_metrics[train_size]["test_male_precision"] = precisions[utils.MALE]
    trial_metrics[train_size]["test_female_recall"] = recalls[utils.FEMALE]
    trial_metrics[train_size]["test_male_recall"] = recalls[utils.MALE]
  
  return trial_metrics
Esempio n. 3
0
def compute_trial_metrics(data):
    female_features = data[0]
    female_labels = data[1]
    male_features = data[2]
    male_labels = data[3]
    test_actual_ratios = data[4]
    random_seed = data[5]

    random.seed(random_seed)

    num_males = male_features.shape[0]
    num_females = female_features.shape[0]

    # Make sure you seed the random state since each subprocess will receive the same state,
    # so all random numbers will become identical!
    numpy.random.seed(random_seed)
    # mapping from train size to any of "accuracy", "precision"... to a value
    trial_metrics = defaultdict(dict)
    for test_actual_ratio in test_actual_ratios:
        # construct test set with given ratio of female to test size
        test_female_size = int(1.0 * args.test_size * test_actual_ratio)
        test_male_size = args.test_size - test_female_size

        if num_females < test_female_size:
            sys.exit('Not enough female samples: ' + str(test_female_size) +
                     ' for ratio: ' + str(test_actual_ratio))
        if num_males < test_male_size:
            sys.exit('Not enough male samples: ' + str(test_male_size) +
                     ' for ratio: ' + str(test_actual_ratio))

        test_female_indices = numpy.random.choice(num_females,
                                                  test_female_size,
                                                  replace=False)
        test_male_indices = numpy.random.choice(num_males,
                                                test_male_size,
                                                replace=False)
        female_mask = numpy.zeros(num_females, dtype=bool)
        female_mask[test_female_indices] = True
        male_mask = numpy.zeros(num_males, dtype=bool)
        male_mask[test_male_indices] = True

        test_female_features = female_features[female_mask, :]
        test_female_labels = female_labels[female_mask]
        test_male_features = male_features[male_mask, :]
        test_male_labels = male_labels[male_mask]

        train_female_features = female_features[~female_mask, :]
        train_female_labels = female_labels[~female_mask]
        train_male_features = male_features[~male_mask, :]
        train_male_labels = male_labels[~male_mask]

        test_features = numpy.concatenate(
            (test_female_features, test_male_features))
        test_labels = numpy.concatenate((test_female_labels, test_male_labels))

        train_features = numpy.concatenate(
            (train_female_features, train_male_features))
        train_labels = numpy.concatenate(
            (train_female_labels, train_male_labels))

        if args.learning_algorithm == 'random-forest':
            (model, transformed_train_features, transformed_train_labels,
             transformed_test_features) = models.train_random_forest(
                 train_features, train_labels, test_features,
                 args.scikit_balancing, args.train_size,
                 args.skip_feature_selection, args.skip_grid_search,
                 args.rf_max_features, args.rf_num_trees, args.rf_criterion,
                 args.rf_min_samples_split, args.rf_min_samples_leaf, 1)
        elif args.learning_algorithm == 'svm':
            (model, transformed_train_features, transformed_train_labels,
             transformed_test_features) = models.train_svm(
                 train_features, train_labels, test_features,
                 args.scikit_balancing, args.train_size, 'minmax', 0, 1,
                 args.skip_feature_selection, args.skip_grid_search,
                 args.svm_kernel, args.svm_gamma, args.svm_cost,
                 args.svm_degree, 1)
        elif args.learning_algorithm == 'logistic':
            (model, transformed_train_features, transformed_train_labels,
             transformed_test_features) = models.train_logistic(
                 train_features, train_labels, test_features,
                 args.scikit_balancing, args.train_size,
                 args.skip_feature_selection, args.skip_grid_search,
                 args.logistic_penalty, args.logistic_cost, args.logistic_dual,
                 args.logistic_tolerance, 1)
        elif args.learning_algorithm == 'knn':
            (model, transformed_train_features, transformed_train_labels,
             transformed_test_features) = models.train_knn(
                 train_features, train_labels, test_features,
                 args.scikit_balancing, args.train_size, 'minmax', 0, 1,
                 args.skip_feature_selection, args.skip_grid_search,
                 args.knn_num_neighbors, args.knn_weights, args.knn_algorithm,
                 args.knn_metric, 1)
        else:
            sys.exit('bad algorithm name.')

        # size of labels in train/test
        train_size = transformed_train_features.shape[0]
        train_female_size = sum(transformed_train_labels[:] == utils.FEMALE)
        train_male_size = sum(transformed_train_labels[:] == utils.MALE)
        test_size = transformed_test_features.shape[0]
        test_actual_female_size = sum(test_labels[:] == utils.FEMALE)
        test_actual_male_size = sum(test_labels[:] == utils.MALE)

        # train performance
        y_true, y_pred = transformed_train_labels, model.predict(
            transformed_train_features)
        train_predicted_female_size = sum(y_pred[:] == utils.FEMALE)
        train_predicted_male_size = sum(y_pred[:] == utils.MALE)
        confusion = confusion_matrix(y_true, y_pred)
        train_true_female = confusion[utils.FEMALE][utils.FEMALE]
        train_false_female = confusion[utils.MALE][utils.FEMALE]
        train_true_male = confusion[utils.MALE][utils.MALE]
        train_false_male = confusion[utils.FEMALE][utils.MALE]
        train_accuracy = model.score(transformed_train_features,
                                     transformed_train_labels) * 100.
        (train_precisions, train_recalls, fscores,
         supports) = precision_recall_fscore_support(y_true=y_true,
                                                     y_pred=y_pred,
                                                     labels=[0, 1])

        # test performance
        y_true, y_pred = test_labels, model.predict(transformed_test_features)
        test_predicted_female_size = sum(y_pred[:] == utils.FEMALE)
        test_predicted_male_size = sum(y_pred[:] == utils.MALE)
        test_predicted_ratio = (1.0 * test_predicted_female_size) / test_size
        confusion = confusion_matrix(y_true, y_pred)
        test_true_female = confusion[utils.FEMALE][utils.FEMALE]
        test_false_female = confusion[utils.MALE][utils.FEMALE]
        test_true_male = confusion[utils.MALE][utils.MALE]
        test_false_male = confusion[utils.FEMALE][utils.MALE]
        test_accuracy = model.score(transformed_test_features,
                                    test_labels) * 100.
        (test_precisions, test_recalls, fscores,
         supports) = precision_recall_fscore_support(y_true=y_true,
                                                     y_pred=y_pred,
                                                     labels=[0, 1])

        trial_metrics[test_actual_ratio]["train_size"] = train_size
        trial_metrics[test_actual_ratio][
            "train_female_size"] = train_female_size
        trial_metrics[test_actual_ratio]["train_male_size"] = train_male_size
        trial_metrics[test_actual_ratio][
            "train_predicted_female_size"] = train_predicted_female_size
        trial_metrics[test_actual_ratio][
            "train_predicted_male_size"] = train_predicted_male_size
        trial_metrics[test_actual_ratio][
            "train_true_female"] = train_true_female
        trial_metrics[test_actual_ratio][
            "train_false_female"] = train_false_female
        trial_metrics[test_actual_ratio]["train_true_male"] = train_true_male
        trial_metrics[test_actual_ratio]["train_false_male"] = train_false_male
        trial_metrics[test_actual_ratio]["train_accuracy"] = train_accuracy
        trial_metrics[test_actual_ratio][
            "train_female_precision"] = train_precisions[utils.FEMALE]
        trial_metrics[test_actual_ratio][
            "train_male_precision"] = train_precisions[utils.MALE]
        trial_metrics[test_actual_ratio][
            "train_female_recall"] = train_recalls[utils.FEMALE]
        trial_metrics[test_actual_ratio]["train_male_recall"] = train_recalls[
            utils.MALE]
        trial_metrics[test_actual_ratio]["test_size"] = test_size
        trial_metrics[test_actual_ratio][
            "test_actual_ratio"] = test_actual_ratio
        trial_metrics[test_actual_ratio][
            "test_actual_female_size"] = test_actual_female_size
        trial_metrics[test_actual_ratio][
            "test_actual_male_size"] = test_actual_male_size
        trial_metrics[test_actual_ratio][
            "test_predicted_ratio"] = test_predicted_ratio
        trial_metrics[test_actual_ratio][
            "test_predicted_female_size"] = test_predicted_female_size
        trial_metrics[test_actual_ratio][
            "test_predicted_male_size"] = test_predicted_male_size
        trial_metrics[test_actual_ratio]["test_true_female"] = test_true_female
        trial_metrics[test_actual_ratio][
            "test_false_female"] = test_false_female
        trial_metrics[test_actual_ratio]["test_true_male"] = test_true_male
        trial_metrics[test_actual_ratio]["test_false_male"] = test_false_male
        trial_metrics[test_actual_ratio]["test_accuracy"] = test_accuracy
        trial_metrics[test_actual_ratio][
            "test_female_precision"] = test_precisions[utils.FEMALE]
        trial_metrics[test_actual_ratio][
            "test_male_precision"] = test_precisions[utils.MALE]
        trial_metrics[test_actual_ratio]["test_female_recall"] = test_recalls[
            utils.FEMALE]
        trial_metrics[test_actual_ratio]["test_male_recall"] = test_recalls[
            utils.MALE]

    return trial_metrics
def compute_trial_metrics(df, filtering_thresholds, filtering_column,
                          random_seed):
    random.seed(random_seed)

    # mapping from filtering threshold to any of "accuracy", "precision"... to a value
    trial_metrics = defaultdict(dict)
    for filtering_threshold in filtering_thresholds:
        # first extract the piece of data satisfying the requested threshold and then split to
        # test/train
        filtered_df = df[df[filtering_column] >= filtering_threshold]
        percentage_data = (100.0 * len(filtered_df.index)) / len(df.index)
        data = filtered_df.values
        features = data[:, 0:args.label_column]
        labels = data[:, args.label_column].astype(int)
        train_features, test_features, train_labels, test_labels = (
            model_selection.train_test_split(features,
                                             labels,
                                             test_size=args.test_size,
                                             random_state=random.randint(
                                                 1, 99999999)))
        assert train_features.shape[0] >= args.train_size

        if args.learning_algorithm == 'random-forest':
            (model, transformed_train_features, transformed_train_labels,
             transformed_test_features) = models.train_random_forest(
                 train_features, train_labels, test_features,
                 args.scikit_balancing, args.train_size,
                 args.skip_feature_selection, args.skip_grid_search,
                 args.rf_max_features, args.rf_num_trees, args.rf_criterion,
                 args.rf_min_samples_split, args.rf_min_samples_leaf,
                 args.num_processes)
        elif args.learning_algorithm == 'svm':
            (model, transformed_train_features, transformed_train_labels,
             transformed_test_features) = models.train_svm(
                 train_features, train_labels, test_features,
                 args.scikit_balancing, args.train_size, 'minmax', 0, 1,
                 args.skip_feature_selection, args.skip_grid_search,
                 args.svm_kernel, args.svm_gamma, args.svm_cost,
                 args.svm_degree, args.num_processes)
        elif args.learning_algorithm == 'logistic':
            (model, transformed_train_features, transformed_train_labels,
             transformed_test_features) = models.train_logistic(
                 train_features, train_labels, test_features,
                 args.scikit_balancing, args.train_size,
                 args.skip_feature_selection, args.skip_grid_search,
                 args.logistic_penalty, args.logistic_cost, args.logistic_dual,
                 args.logistic_tolerance, args.num_processes)
        elif args.learning_algorithm == 'knn':
            (model, transformed_train_features, transformed_train_labels,
             transformed_test_features) = models.train_knn(
                 train_features, train_labels, test_features,
                 args.scikit_balancing, args.train_size, 'minmax', 0, 1,
                 args.skip_feature_selection, args.skip_grid_search,
                 args.knn_num_neighbors, args.knn_weights, args.knn_algorithm,
                 args.knn_metric, args.num_processes)
        else:
            sys.exit('bad algorithm name.')

        y_true, y_pred = test_labels, model.predict(transformed_test_features)
        predicted_probabilities = model.predict_proba(
            transformed_test_features)

        # size of labels in train/test.
        train_size = transformed_train_features.shape[0]
        train_female_size = sum(transformed_train_labels[:] == utils.FEMALE)
        train_male_size = sum(transformed_train_labels[:] == utils.MALE)
        test_size = transformed_test_features.shape[0]
        test_female_size = sum(test_labels[:] == utils.FEMALE)
        test_male_size = sum(test_labels[:] == utils.MALE)

        # Compute evaluation metrics
        test_accuracy = model.score(transformed_test_features,
                                    test_labels) * 100.
        test_AUC = roc_auc_score(y_true, predicted_probabilities[:, 1])
        (precisions, recalls, f1scores,
         supports) = precision_recall_fscore_support(y_true=y_true,
                                                     y_pred=y_pred,
                                                     labels=[0, 1])
        (ave_precision, ave_recall, ave_f1score,
         ave_support) = precision_recall_fscore_support(y_true=y_true,
                                                        y_pred=y_pred,
                                                        labels=[0, 1],
                                                        average='macro')
        # Get true/false positive/negative
        confusion = confusion_matrix(y_true, y_pred)
        test_true_female = confusion[utils.FEMALE][utils.FEMALE]
        test_false_female = confusion[utils.MALE][utils.FEMALE]
        test_true_male = confusion[utils.MALE][utils.MALE]
        test_false_male = confusion[utils.FEMALE][utils.MALE]

        trial_metrics[filtering_threshold][
            "min_active_days"] = filtering_threshold
        trial_metrics[filtering_threshold]["percentage_data"] = percentage_data
        trial_metrics[filtering_threshold]["train_size"] = train_size
        trial_metrics[filtering_threshold][
            "train_female_size"] = train_female_size
        trial_metrics[filtering_threshold]["train_male_size"] = train_male_size
        trial_metrics[filtering_threshold]["test_size"] = test_size
        trial_metrics[filtering_threshold][
            "test_female_size"] = test_female_size
        trial_metrics[filtering_threshold]["test_male_size"] = test_male_size
        trial_metrics[filtering_threshold][
            "test_true_female"] = test_true_female
        trial_metrics[filtering_threshold][
            "test_false_female"] = test_false_female
        trial_metrics[filtering_threshold]["test_true_male"] = test_true_male
        trial_metrics[filtering_threshold]["test_false_male"] = test_false_male
        trial_metrics[filtering_threshold]["test_accuracy"] = test_accuracy
        trial_metrics[filtering_threshold]["test_AUC"] = test_AUC
        trial_metrics[filtering_threshold][
            "test_average_precision"] = ave_precision
        trial_metrics[filtering_threshold][
            "test_female_precision"] = precisions[utils.FEMALE]
        trial_metrics[filtering_threshold]["test_male_precision"] = precisions[
            utils.MALE]
        trial_metrics[filtering_threshold]["test_average_recall"] = ave_recall
        trial_metrics[filtering_threshold]["test_female_recall"] = recalls[
            utils.FEMALE]
        trial_metrics[filtering_threshold]["test_male_recall"] = recalls[
            utils.MALE]
        trial_metrics[filtering_threshold][
            "test_average_f1score"] = ave_f1score
        trial_metrics[filtering_threshold]["test_female_f1score"] = f1scores[
            utils.FEMALE]
        trial_metrics[filtering_threshold]["test_male_f1score"] = f1scores[
            utils.MALE]

    return trial_metrics
Esempio n. 5
0
def main(config):
    num_iters = 1
    batch_size = 4096
    data_dir = "data"
    data_limits = [0.2, 0.4, 0.6, 0.8, 1.0]
    iter_train_files, iter_test_files = split_train_test(data_dir, num_iters)
    model_names = ["rf", "mlp", "knn", "svm"]
    for limit in data_limits:
        model_bench_marks = {
            model: {
                "accuracies": [],
                "prediction_times": [],
                "training_times": []
            }
            for model in model_names
        }
        print("Training and testing with limit {}".format(limit))
        for i in range(num_iters):
            train_files, test_files = iter_train_files[i], iter_test_files[i]
            utils.save_list_as_text(test_files, "test_files.txt")
            x_train, y_train, x_dev, y_dev = preprocess_data(
                train_files, config)
            x_train, _, y_train, _ = train_test_split(x_train,
                                                      y_train,
                                                      test_size=0.0001)
            limited_train_size, limited_test_size = int(
                len(x_train) * limit), int(len(x_dev) * limit)
            x_train, y_train = x_train[:
                                       limited_train_size], y_train[:
                                                                    limited_train_size]
            x_dev, y_dev = x_dev[:limited_test_size], y_dev[:limited_test_size]
            x_train_batches, y_train_batches = divide_into_batches(
                x_train, y_train, batch_size)
            x_dev_batches, y_dev_batches = divide_into_batches(
                x_dev, y_dev, batch_size)
            for model_name in model_names:
                start_train_time = time.time()
                if model_name == "rf":
                    trained_model, scaler = train_rf_batches(
                        x_train_batches, y_train_batches, x_dev_batches,
                        y_dev_batches)
                elif model_name == "mlp":
                    trained_model, scaler = train_mlp(x_train, y_train, x_dev,
                                                      y_dev)
                elif model_name == "knn":
                    trained_model, scaler = train_knn(x_train, y_train, x_dev,
                                                      y_dev)
                elif model_name == "svm":
                    trained_model, scaler = train_svm(x_train, y_train, x_dev,
                                                      y_dev)
                else:
                    raise ValueError("Unsupported model {}".format(model_name))
                train_time = time.time() - start_train_time
                iter_accuracy, iter_prediction_time = test(
                    trained_model, scaler, test_files, config)
                model_bench_marks[model_name]["accuracies"].append(
                    iter_accuracy)
                model_bench_marks[model_name]["prediction_times"].append(
                    iter_prediction_time)
                model_bench_marks[model_name]["training_times"].append(
                    train_time)
                print("Total training time {}".format(train_time))
        for model, results in model_bench_marks.items():
            model_bench_marks[model]["accuracies"] = np.mean(
                model_bench_marks[model]["accuracies"])
            model_bench_marks[model]["prediction_times"] = np.mean(
                model_bench_marks[model]["prediction_times"])
            model_bench_marks[model]["training_times"] = np.mean(
                model_bench_marks[model]["training_times"])
        with open("benchmark_{}.json".format(limit), "w") as f:
            json.dump(model_bench_marks, f)
Esempio n. 6
0
def main():
    (train_features, train_labels, test_features, test_labels, class_values,
     class_names, feature_label_names) = utils.prepare_data(
         args.input_filename, args.label_column, args.train_size,
         args.test_size, args.imbalanced_data)
    # now that we have limited the data to requested train size, scale data since knn needs
    # to be scaled
    (train_feautres, test_features) = utils.scale_data(train_features,
                                                       test_features,
                                                       args.scaling_method)

    # feature selection if requested
    if args.feature_selection_algo:
        feature_selector_obj = feature_selection.feature_selector(
            args.evaluation, train_features, train_labels, feature_label_names,
            -1, penalty_weights, args.feature_selection_algo, args.num_jobs)
        train_features = feature_selector_obj.transform(train_features)
        test_features = feature_selector_obj.transform(test_features)
        print "Selected " + str(
            len(feature_selector_obj.get_selected_features())) + " features"
        print "Top 10 features: " + str(
            feature_selector_obj.get_top_features(10))

    model = models.train_knn(train_features, train_labels,
                             args.skip_grid_search, args.evaluation,
                             args.num_jobs, args.num_neighbors, args.weights,
                             args.algorithm, args.metric)

    # Predict test and report full stats
    y_true, y_pred = test_labels, model.predict(test_features)
    print("\n*****************************\n")
    print(
        'MAE: ' + str(
            metrics.mean_absolute_error(
                y_true, y_pred, multioutput='uniform_average')))
    print(
        'MSE: ' + str(
            metrics.mean_squared_error(
                y_true, y_pred, multioutput='uniform_average')))

    print('Classification report:')
    print(
        metrics.classification_report(y_true, y_pred, class_values,
                                      class_names))
    print('Precision Recall')
    print(
        metrics.precision_recall_fscore_support(y_true,
                                                y_pred,
                                                labels=class_values,
                                                pos_label=None,
                                                average='weighted'))

    # print and plot confusion matrix
    print('Confusion Matrix Without Normalization')
    numpy.set_printoptions(precision=2)
    cm = metrics.confusion_matrix(y_true, y_pred, class_values)
    print(cm)
    print('Confusion Matrix With Normalization')
    cm_normalized = cm.astype('float') / cm.sum(axis=1)[:, numpy.newaxis]
    print(cm_normalized)

    plt.figure()
    plt.subplot(2, 1, 1)
    utils.plot_confusion_matrix(cm, class_names,
                                'Unnormalized confusion matrix')

    # Normalize the confusion matrix by row (i.e by the number of samples
    # in each class)
    plt.subplot(2, 1, 2)
    utils.plot_confusion_matrix(cm_normalized, class_names,
                                'Normalized confusion matrix')

    #plt.savefig(args.output_figure + '.pdf')
    pdf = PdfPages(args.output_figure + '.pdf')
    plt.savefig(pdf, format='pdf')
    pdf.close()