def run_problem1():
    for n in nums:
        training_set = rdm.sample(training, n)
        predicted_lables.append(train_and_predict(training_set, validation, digit_specific_collecting)[1])
    error_rates = [ph.benchmark(predicted_lables[i],true_labels) for i in range(len(nums))]
    plt.plot(nums, error_rates)
    plt.xlabel("Number of Training Data")
    plt.ylabel('Error Rate')
    plt.title('Number of Training Data vs. Error Rate')
def run_problem1():
    for n in nums:
        training_set = rdm.sample(training, n)
        predicted_lables.append(
            train_and_predict(training_set, validation,
                              digit_specific_collecting)[1])
    error_rates = [
        ph.benchmark(predicted_lables[i], true_labels)
        for i in range(len(nums))
    ]
    plt.plot(nums, error_rates)
    plt.xlabel("Number of Training Data")
    plt.ylabel('Error Rate')
    plt.title('Number of Training Data vs. Error Rate')
def run_k_folds(k_folds, C=1, specific_collecting=digit_specific_collecting, specific_true_labels=digit_specific_true_labels, dataset=digits, train_labels="train_labels", train_objects="train_images", k=10):
    error_rate_k_folds = 0
    for i in range(k):
        validation_fold = k_folds[i]
        training_folds = []
        for j in range(k):
            if j!=i:
                training_folds+=k_folds[j] 
        svc, predicted_labels_k_folds = train_and_predict(training_folds, validation_fold, specific_collecting, C, dataset, train_labels, train_objects)
        true_labels_k_folds = specific_true_labels(validation_fold)
        error_rate = ph.benchmark(predicted_labels_k_folds, true_labels_k_folds)
        print "Error Rate for C=",C," is ", error_rate
        error_rate_k_folds += error_rate
    return svc, error_rate_k_folds/float(k)
def run_k_folds(k_folds,
                C=1,
                specific_collecting=digit_specific_collecting,
                specific_true_labels=digit_specific_true_labels,
                dataset=digits,
                train_labels="train_labels",
                train_objects="train_images",
                k=10):
    error_rate_k_folds = 0
    for i in range(k):
        validation_fold = k_folds[i]
        training_folds = []
        for j in range(k):
            if j != i:
                training_folds += k_folds[j]
        svc, predicted_labels_k_folds = train_and_predict(
            training_folds, validation_fold, specific_collecting, C, dataset,
            train_labels, train_objects)
        true_labels_k_folds = specific_true_labels(validation_fold)
        error_rate = ph.benchmark(predicted_labels_k_folds,
                                  true_labels_k_folds)
        print "Error Rate for C=", C, " is ", error_rate
        error_rate_k_folds += error_rate
    return svc, error_rate_k_folds / float(k)