예제 #1
0
def decision_tree_breast_cancer():
    # Use grid search to test max_depth from 1 to 100.
    train_features, train_labels, test_features, test_labels = get_breast_cancer_data(
    )
    start_time = datetime.now()
    estimator = DecisionTreeClassifier()
    cross_validation = ShuffleSplit(train_size=0.8, test_size=0.2)
    max_depth = range(1, 100, 1)
    classifier = GridSearchCV(estimator=estimator,
                              cv=cross_validation,
                              param_grid=dict(max_depth=max_depth))
    classifier.fit(train_features, train_labels.ravel())
    end_time = datetime.now()
    title = 'Decision Tree (max_depth = %s)' % (
        classifier.best_estimator_.max_depth)
    plot_learning_curve(estimator,
                        title,
                        train_features,
                        train_labels.ravel(),
                        cv=cross_validation)
    plt.savefig('dtree_breast_cancer_trial_1.png')

    # Test performance on the optimal max depth
    optimal_depth = classifier.best_estimator_.max_depth
    estimator = DecisionTreeClassifier(max_depth=optimal_depth)
    estimator.fit(train_features, train_labels.ravel())
    total_time_taken = str(end_time - start_time)
    score = classifier.score(test_features, test_labels.ravel())
    train_accuracy = accuracy_score(train_labels,
                                    classifier.predict(train_features))
    cross_validation_accuracy = cross_val_score(classifier,
                                                train_features,
                                                train_labels,
                                                cv=7).mean()
    test_accuracy = accuracy_score(test_labels,
                                   classifier.predict(test_features))
    results = classifier.cv_results_
    with open("results/dtree_breast_cancer.txt", 'w') as file:
        file.write("Decision Tree with Breast Cancer Dataset\n\n")
        file.write("Optimal Depth: " + str(optimal_depth) + "\n\n")
        file.write("CV Results:\n\n" + str(results) + "\n\n")
        file.write("Feature Importance: " +
                   str(estimator.feature_importances_) + "\n\n")
        file.write("Score: " + str(score) + "\n\n")
        file.write("Training Accuracy: " + str(train_accuracy) + "\n\n")
        file.write("Cross Validation Accuracy: " +
                   str(cross_validation_accuracy) + "\n\n")
        file.write("Testing Accuracy: " + str(test_accuracy) + "\n\n")
        file.write("Total Time Taken: " + strftime(total_time_taken))

    train_accuracy = results['mean_train_score']
    test_accuracy = results['mean_test_score']

    plot_dtree_depth_performance("Breast Cancer", max_depth, train_accuracy,
                                 test_accuracy)
    plt.savefig("dtree_breast_cancer_depth.png")
예제 #2
0
def boosting_balance_scale():
    train_features, train_labels, test_features, test_labels = get_balance_data(
    )
    start_time = datetime.now()
    estimator = AdaBoostClassifier()
    cross_validation = ShuffleSplit(train_size=0.8, test_size=0.2)
    num_estimators = range(1, 100, 1)
    classifier = GridSearchCV(estimator=estimator,
                              cv=cross_validation,
                              param_grid=dict(n_estimators=num_estimators))
    classifier.fit(train_features, train_labels.ravel())
    end_time = datetime.now()
    total_time_taken = str(end_time - start_time)
    estimator = AdaBoostClassifier(
        n_estimators=classifier.best_estimator_.n_estimators)
    estimator.fit(train_features, train_labels.ravel())
    print("Score: ", classifier.score(test_features, test_labels.ravel()))
    title = "Ada Boost (num_estimators = %s)" % str(
        classifier.best_estimator_.n_estimators)
    plot_learning_curve(estimator,
                        title,
                        train_features,
                        train_labels.ravel(),
                        cv=cross_validation)
    plt.savefig("adaboost_balance_scale.png")
    train_accuracy = accuracy_score(train_labels,
                                    classifier.predict(train_features))
    cross_validation_accuracy = cross_val_score(classifier,
                                                train_features,
                                                train_labels,
                                                cv=7).mean()
    test_accuracy = accuracy_score(test_labels,
                                   classifier.predict(test_features))
    results = classifier.cv_results_
    with open("results/adaboost_balance_scale.txt", "w") as file:
        file.write("Adaboost with Balance Scale Dataset\n\n")
        file.write("Optimal Number of Estimators: " +
                   str(classifier.best_estimator_.n_estimators))
        file.write("Grid Scores:\n\n" + str(results))
        file.write("Training Accuracy: " + str(train_accuracy) + "\n\n")
        file.write("Cross Validation Accuracy: " +
                   str(cross_validation_accuracy) + "\n\n")
        file.write("Testing Accuracy: " + str(test_accuracy) + "\n\n")
        file.write("Total Time Taken: " + strftime(total_time_taken))

    train_accuracy = results['mean_train_score']
    test_accuracy = results['mean_test_score']

    plot_boosting_performance("Balance Scale", num_estimators, train_accuracy,
                              test_accuracy)
    plt.savefig("adaboost_balance_num_estimators.png")
예제 #3
0
def knn_breast_cancer():
    train_features, train_labels, test_features, test_labels = get_breast_cancer_data(
    )
    start_time = datetime.now()
    estimator = KNeighborsClassifier()
    cross_validation = ShuffleSplit(train_size=0.8, test_size=0.2)
    k = range(1, 21, 1)
    classifier = GridSearchCV(estimator=estimator,
                              cv=cross_validation,
                              param_grid=dict(n_neighbors=k))
    classifier.fit(train_features, train_labels.ravel())
    end_time = datetime.now()
    total_time_taken = str(end_time - start_time)
    estimator = KNeighborsClassifier(
        n_neighbors=classifier.best_estimator_.n_neighbors)
    estimator.fit(train_features, train_labels.ravel())
    print("Score: ", classifier.score(test_features, test_labels.ravel()))
    title = 'KNN (k = %s)' % (classifier.best_estimator_.n_neighbors)
    plot_learning_curve(estimator,
                        title,
                        train_features,
                        train_labels.ravel(),
                        cv=cross_validation)
    plt.savefig("knn_breast_cancer.png")
    train_accuracy = accuracy_score(train_labels,
                                    classifier.predict(train_features))
    cross_validation_accuracy = cross_val_score(classifier,
                                                train_features,
                                                train_labels,
                                                cv=7).mean()
    test_accuracy = accuracy_score(test_labels,
                                   classifier.predict(test_features))
    results = classifier.cv_results_
    with open("results/knn_breast_cancer.txt", "w") as file:
        file.write("KNN with Breast Cancer Dataset\n\n")
        file.write("Optimal Number of Estimators: " +
                   str(classifier.best_estimator_.n_neighbors))
        file.write("Grid Scores:\n\n" + str(results))
        file.write("Training Accuracy: " + str(train_accuracy) + "\n\n")
        file.write("Cross Validation Accuracy: " +
                   str(cross_validation_accuracy) + "\n\n")
        file.write("Testing Accuracy: " + str(test_accuracy) + "\n\n")
        file.write("Total Time Taken: " + strftime(total_time_taken))

    train_accuracy = results['mean_train_score']
    test_accuracy = results['mean_test_score']
    plot_knn_k("Breast Cancer", k, train_accuracy, test_accuracy)
    plt.savefig("knn_breast_cancer_k.png")
예제 #4
0
def run_analysis(estimator,
                 X_train,
                 Y_train,
                 X_test,
                 Y_test,
                 dataset,
                 modelType,
                 analysisRound,
                 validationXLabel,
                 param,
                 param_values,
                 x_labels=[]):

    # Train.
    estimator.fit(X_train, Y_train.ravel())

    # Test.
    predictions = estimator.predict(X_test)

    # Confusion matrix.
    tn, fp, fn, tp = metrics.confusion_matrix(Y_test.ravel(),
                                              predictions).ravel()

    print(metrics.confusion_matrix(Y_test.ravel(), predictions))
    print("Accuracy: " +
          str(metrics.accuracy_score(Y_test.ravel(), predictions)))
    print("Precision: " +
          str(metrics.precision_score(Y_test.ravel(), predictions)))
    print("Recall: " + str(metrics.recall_score(Y_test.ravel(), predictions)))

    # Learning Curve Analysis.
    cv = ShuffleSplit(n_splits=10, test_size=0.2, random_state=10)
    plotting.plot_learning_curve(estimator,
                                 X_train,
                                 Y_train.ravel(),
                                 dataset,
                                 modelType,
                                 analysisRound,
                                 ylim=(0.7, 1.01),
                                 cv=cv,
                                 n_jobs=4)

    # Model Complexity / Validation Curve Analysis.
    plotting.plot_validation_curve(estimator, X_train, Y_train.ravel(),
                                   dataset, modelType, analysisRound,
                                   validationXLabel, param, param_values,
                                   x_labels)
예제 #5
0
def main():
    input_path = os.path.abspath(os.path.join('./data', args.dataset))
    dataset = os.path.splitext(args.dataset)[0]
    logger.info('Load {}'.format(input_path))
    params = {'test_size': 0.2, 'random_state': 1, 'cluster': 'kmeans'}
    X_train, X_test, y_train, y_test = data_loader.load(input_path, **params)
    logger.info('Split into train and test subsets: {}'.format(params))

    params_path = os.path.abspath(os.path.join('./params', args.params))
    with open(params_path) as file_:
        params = yaml.load(file_, Loader=yaml.SafeLoader)
    logger.info('Load {}'.format(params_path))
    logger.info('Hyperparameters: {}'.format(params))
    models = {
        'MLP': nn.MLPClassifier,
        'CNN': nn.CNNClassifier,
        'RNN': nn.RNNClassifier
    }
    clf = models[args.model](**params)
    estimator = clf.__class__.__name__
    logger.info('Train {} on {}'.format(estimator, dataset))
    clf.fit(X_train, y_train)

    output_dir = os.path.abspath(args.output)
    os.makedirs(output_dir, exist_ok=True)
    csv_log = pd.DataFrame({
        'loss': clf.loss_curve_,
        'train_score': clf.training_scores_,
        'val_score': clf.validation_scores_
    })
    csv_log_path = os.path.join(output_dir, time.strftime('%Y%m%d-%H%M%S.csv'))
    csv_log.to_csv(csv_log_path)
    logger.info('Save learning log to {}'.format(csv_log_path))

    if args.plot:
        plot_path = os.path.join(output_dir,
                                 time.strftime('%Y%m%d-%H%M%S.png'))
        plotting.plot_learning_curve(csv_log_path,
                                     '{} on {}'.format(estimator,
                                                       dataset), plot_path)
        logger.info('Save learning curves to {}'.format(plot_path))

    logger.info('Training score: {}'.format(clf.score(X_train, y_train)))
    logger.info('Testing score: {}'.format(clf.score(X_test, y_test)))
    logger.info('Done')
예제 #6
0
def svm_balance_scale():
    train_features, train_labels, test_features, test_labels = get_balance_data()
    start_time = datetime.now()
    estimator = SVC()
    cross_validation = ShuffleSplit(train_size=0.8, test_size=0.2)
    kernels = ["linear", "sigmoid", "rbf"]
    classifier = GridSearchCV(
        estimator=estimator,
        cv=cross_validation,
        param_grid=dict(kernel=kernels))

    classifier.fit(train_features, train_labels.ravel())
    title = 'SVM (kernel = %s)' % (classifier.best_estimator_.kernel)
    estimator = SVC(kernel=classifier.best_estimator_.kernel)
    estimator.fit(train_features, train_labels.ravel())
    print("Score: ", classifier.score(test_features, test_labels.ravel()))
    plot_learning_curve(estimator, title, train_features, train_labels.ravel(), cv=cross_validation)
    plt.savefig('svm_balance_scale.png')
    train_accuracy = accuracy_score(train_labels, classifier.predict(train_features))
    cross_validation_accuracy = cross_val_score(classifier, train_features, train_labels, cv=7).mean()
    test_accuracy = accuracy_score(test_labels, classifier.predict(test_features))
    end_time = datetime.now()
    total_time_taken = str(end_time - start_time)
    results = classifier.cv_results_
    with open("results/svm_balance_scale.txt", 'w') as file:
    	file.write("SVM with Balance Scale Dataset\n\n")
    	file.write("Best Kernel: " + str(classifier.best_estimator_.kernel) + "\n\n")
    	file.write("Grid Scores:\n\n" + str(results))
    	file.write("Training Accuracy: " + str(train_accuracy) + "\n\n")
    	file.write("Cross Validation Accuracy: " + str(cross_validation_accuracy) + "\n\n")
    	file.write("Testing Accuracy: " + str(test_accuracy) + "\n\n")
    	file.write("Total Time Taken: " + strftime(total_time_taken))

    train_accuracy = results['mean_train_score']
    test_accuracy = results['mean_test_score']

    plot_svm_performance("Balance Scale", kernels, train_accuracy, test_accuracy)
    plt.savefig("svm_balance_scale_kernels.png")
예제 #7
0
def sarsa_lambda(l=0.9,
                 max_episodes=1000,
                 policy=policies.epsilon_greedy,
                 n_zero=100,
                 gamma=1,
                 plot_learning_curve=True,
                 multiproc=True):
    """ Applies eligibility trace version of Sarsa to the game Easy21

    :param l: lambda parameter
    :param max_episodes: stop learning after this many episodes
    :param policy: exploration strategy to use
    :param n_zero: epsilon greedy constant (only applicable if epsilon greedy policy is used)
    :param gamma: discounting rate
    :param plot_learning_curve: whether to turn on plotting of learning curve for lambda = 0 and 1
    :param multiproc: whether to use multiprocessing when doing plots or not (warning! turn off if running multiple
        algorithms on mac or windows simultaneously)
    :return: value function after max_episodes
    """
    # (player, dealer, action) key
    value_function = defaultdict(float)
    # (player, dealer) key
    counter_state = defaultdict(int)
    # (player, dealer, action) key
    counter_state_action = defaultdict(int)
    # no. of wins to calculate the percentage of wins at the end
    wins = 0

    # learning curve plotting
    if l in {0, 1} and plot_learning_curve:
        learning_curve = []
        try:
            mc_values = pickle.load(open("Data/MC_value_function.pickle",
                                         "rb"))
        except:
            mc_values = monte_carlo(iterations=1000000)

    for episode in range(max_episodes):

        # current (player, dealer, action)
        eligibility_trace = defaultdict(float)

        # initial state, action [SA..]
        state = environment.State()
        player_current = state.player_sum
        dealer_current = state.dealer_first_card
        epsilon = n_zero / float(n_zero + counter_state[
            (player_current, dealer_current)])
        action_current = policy(epsilon, value_function, state)

        while not state.terminal:

            # update counts
            counter_state[(player_current, dealer_current)] += 1
            counter_state_action[(player_current, dealer_current,
                                  action_current)] += 1

            # take a step, get reward [..R..]
            [state, reward] = environment.step(state, action_current)
            if reward is None:
                reward = 0

            # follow up state, action [..SA]
            player_next = state.player_sum
            dealer_next = state.dealer_first_card
            epsilon = n_zero / float(n_zero +
                                     counter_state[(player_next, dealer_next)])
            action_next = policy(epsilon, value_function, state)

            delta = reward + gamma * value_function[(player_next, dealer_next, action_next)] - \
                value_function[(player_current, dealer_current, action_current)]

            alpha = 1.0 / counter_state_action[(player_current, dealer_current,
                                                action_current)]

            eligibility_trace[(player_current, dealer_current,
                               action_current)] += 1

            # update the values
            for key in value_function:
                value_function[key] += alpha * delta * eligibility_trace[key]
                eligibility_trace[key] *= gamma * l

            player_current = player_next
            dealer_current = dealer_next
            action_current = action_next

        # use it later to calculate the percentage of wins
        if reward == 1:
            wins += 1

        # get the episode MSE for plotting learning curve
        if l in {0, 1} and plot_learning_curve:
            learning_curve.append(
                (episode, utilities.calculate_mse(mc_values, value_function)))

    # plot learning curve
    if l in {0, 1} and plot_learning_curve:
        if multiproc:
            # create a new process so computation can continue after plotting
            p = Process(target=plotting.plot_learning_curve,
                        args=(
                            learning_curve,
                            l,
                        ))
            p.start()
        else:
            plotting.plot_learning_curve(learning_curve, l)

    # get the percentage of wins
    print float(wins) / max_episodes
    return value_function
예제 #8
0
def linear_function_approximation(l=0.9,
                                  max_episodes=1000,
                                  policy=policies.epsilon_greedy_lfa,
                                  n_zero=100,
                                  gamma=1,
                                  plot_learning_curve=True,
                                  multiproc=True):
    """ Value function approximation using coarse coding

    :param l: lambda parameter
    :param gamma: discounting rate
    :param max_episodes: stop learning after this many episodes
    :param policy: exploration strategy to use
    :param n_zero: epsilon greedy constant (only applicable if epsilon greedy policy is used)
    :param multiproc: whether to use multiprocessing when doing plots or not (warning! turn off if running multiple
        algorithms on mac or windows simultaneously)
    :return: value function after max_episodes
    """
    # weights vector for the state_action feature vector
    theta = np.random.random(36) * 0.2
    # random move probability
    epsilon = 0.05
    # step-size parameter
    alpha = 0.01

    # learning curve plotting
    if l in {0, 1} and plot_learning_curve:
        learning_curve = []
        try:
            mc_values = pickle.load(open("Data/MC_value_function.pickle",
                                         "rb"))
        except:
            mc_values = monte_carlo(iterations=1000000)

    for episode in range(max_episodes):

        # key is state_action feature vector
        eligibility_trace = np.zeros(36)

        # initial state, action [SA..], and set of features
        state = environment.State()
        # calculate features for the given state
        state_features_current = utilities.get_state_features(state)
        # get action from this state
        q_a_current, action_current = policy(epsilon, theta,
                                             state_features_current)
        # calculate final state, action feature vector
        features_current = utilities.get_state_action_features(
            state_features_current, action_current)

        while not state.terminal:

            # update eligibility trace (accumulating)
            eligibility_trace = np.add(eligibility_trace, features_current)

            # take a step, get reward [..R..]
            [state, reward] = environment.step(state, action_current)
            if reward is None:
                reward = 0

            # follow up state, action [..SA]
            state_features_next = utilities.get_state_features(state)
            q_a_next, action_next = policy(epsilon, theta, state_features_next)
            features_next = utilities.get_state_action_features(
                state_features_next, action_next)

            # calculate state value difference
            delta = reward + gamma * q_a_next - q_a_current
            # update weights
            theta = np.add(theta, alpha * delta * eligibility_trace)
            # update trace
            eligibility_trace *= gamma * l

            features_current = features_next
            action_current = action_next

        # calculate value function
        value_function = defaultdict(float)
        for player in xrange(1, 22):
            for dealer in xrange(1, 11):
                for action in [0, 1]:
                    s = environment.State(dealer, player)
                    phi = utilities.get_state_action_features(
                        utilities.get_state_features(s), action)
                    value_function[(s.player_sum, s.dealer_first_card,
                                    action)] = phi.dot(theta)

        # get the episode MSE for plotting learning curve
        if l in {0, 1} and plot_learning_curve:
            learning_curve.append(
                (episode, utilities.calculate_mse(mc_values, value_function)))

    # plot learning curves
    if l in {0, 1} and plot_learning_curve:
        if multiproc:
            # create a new process so computation can continue after plotting
            p = Process(target=plotting.plot_learning_curve,
                        args=(
                            learning_curve,
                            l,
                        ))
            p.start()
        else:
            plotting.plot_learning_curve(learning_curve, l)

    return value_function
예제 #9
0
def main(args):
    path = os.getcwd()
    parent = os.path.dirname(path)
    k = 5
    logreg = make_sgd_classifier()
    if args.plot:
        x, y, x_transformer = load_data(
            os.path.join(parent, 'data', args.training_data))
        # Use n_jobs=-1 to make use of all cores.
        plt = plot_learning_curve(
            logreg,
            'Logistic regression: Accuracy / Training example',
            x,
            y.argmax(axis=1),
            cv=k,
            n_jobs=-1)
        plt.show()
    elif args.test_learning_rate:
        x, y, x_transformer = load_data(
            os.path.join(parent, 'data', args.training_data))
        eta0s = [0.00001, 0.00003, 0.0001, 0.0003, 0.001, 0.003, 0.01, 0.03]
        for eta0 in eta0s:
            logreg_eta = make_sgd_classifier(eta0=eta0)
            results = cross_validate(logreg_eta,
                                     x,
                                     y.argmax(axis=1),
                                     cv=k,
                                     n_jobs=-1,
                                     return_train_score=True)
            train_score = numpy.mean(results['train_score'])
            test_score = numpy.mean(results['test_score'])
            print(
                f'Eta0 {eta0}; Train score {train_score}; Test score {test_score}'
            )
    elif args.test_k_fold:
        x, y, x_transformer = load_data(
            os.path.join(parent, 'data', args.training_data))
        ks = [3, 5, 7, 10]
        for k in ks:
            logreg = make_sgd_classifier()
            results = cross_validate(logreg,
                                     x,
                                     y.argmax(axis=1),
                                     cv=k,
                                     n_jobs=-1,
                                     return_train_score=True)
            train_score = numpy.mean(results['train_score'])
            test_score = numpy.mean(results['test_score'])
            print(f'K {k}; Train score {train_score}; Test score {test_score}')
    else:
        train_x, train_y_non_one_hot, validation_x, validation_y, x_transformer = load_data_and_split(
            os.path.join(parent, 'data', args.training_data), k=k)
        train_y = one_hot(train_y_non_one_hot)
        if args.load_model:
            logreg = joblib.load(args.load_model)
        else:
            logreg.fit(train_x, train_y.argmax(axis=1))
        print('Train score: {}'.format(
            logreg.score(train_x, train_y.argmax(axis=1))))
        print('Validation score: {}'.format(
            logreg.score(validation_x, validation_y.argmax(axis=1))))

        if args.predict or args.predict_proba:
            predict_data, y, timestamps = load_prediction_data(
                args.training_data, args.predict, x_transformer)

            if args.predict:
                predictions = logreg.predict(predict_data)
                results = pd.DataFrame(data={
                    'label': y,
                    'prediction': predictions
                },
                                       index=timestamps)
                print(results)

            if args.predict_proba:
                predictions = logreg.predict_proba(predict_data)
                results = pd.DataFrame(data=predictions, index=timestamps)
                print(results)

        if args.confidence:
            probabilities = logreg.predict_proba(train_x)
            predictions = logreg.predict(train_x)
            probas_predictions_labels = numpy.concatenate(
                (probabilities, predictions.reshape(-1,
                                                    1), train_y_non_one_hot),
                axis=1)
            df = pd.DataFrame(
                probas_predictions_labels,
                columns=['a0', 'a1', 'a2', 'a3', 'prediction', 'label'])
            correct_predictions = df.loc[df['prediction'] == df['label']]
            highest_probas_correct = correct_predictions[[
                'a0', 'a1', 'a2', 'a3'
            ]].max(axis=1)
            highest_probas_correct_avg = numpy.average(highest_probas_correct)
            highest_probas_correct_std = numpy.std(highest_probas_correct)

            highest_probas = numpy.max(probabilities, axis=1)
            highest_probas_avg = numpy.average(highest_probas)
            highest_probas_std = numpy.std(highest_probas)

            print('Highest avg. probability:', highest_probas_avg)
            print('Highest probability std:', highest_probas_std)
            print('Highest correct probabilities avg:',
                  highest_probas_correct_avg)
            print('Highest correct probabilities std:',
                  highest_probas_correct_std)

        if args.save_model:
            model_directory = os.path.join(parent, 'trained_models')
            if not os.path.exists(model_directory):
                os.makedirs(model_directory)
            joblib.dump(
                logreg,
                os.path.join(model_directory, args.save_model + '.joblib'))
예제 #10
0
	cv_results = model_selection.cross_val_score(model, X_train, Y_train, cv=kfold, scoring=scoring)
	results.append(cv_results)
	names.append(name)
	msg = "%s: %f (%f)" % (name, cv_results.mean(), cv_results.std())
	print(msg)

	# Now for graphs
	title = "Learning Curves (" + name + ")"
	print("Creating Learning curves for %s" % name)

	# Cross validation with 100 iterations to get smoother mean test and train
	# score curves, each time with 20% data randomly selected as a validation set.
	cv = ShuffleSplit(n_splits=100, test_size=0.2, random_state=0)

	# Get plot for this model and data.
	plot_learning_curve(model, title, X, y, ylim=(0.3, 1.01), cv=cv, n_jobs=1)

	plt.show()
#
#
#
# ## From the models above, we choose DecisionTreeClassifierself.
# estimator = DecisionTreeClassifier(random_state=0)
# estimator.fit(X_train, Y_train)
#
# # The decision estimator has an attribute called tree_  which stores the entire
# # tree structure and allows access to low level attributes. The binary tree
# # tree_ is represented as a number of parallel arrays. The i-th element of each
# # array holds information about the node `i`. Node 0 is the tree's root. NOTE:
# # Some of the arrays only apply to either leaves or split nodes, resp. In this
# # case the values of nodes of the other type are arbitrary!
예제 #11
0
                r = env.act(env.get_action_set()[a])
                next_s = env.get_current_state()
                next_a = utils.epsilon_greedy(q_values[next_s])

                td_error = r + gamma * q_values[next_s][next_a] - q_values[s][a]
                q_values[s][a] = q_values[s][a] + step_size * td_error

                s = next_s
                a = next_a
                num_steps += 1
            env.reset()
            num_steps_episode[seed].append(num_steps)

        # Let me plot the max q-values for this seed:
        max_values = []
        for i in range(num_states):
            max_values.append(np.max(q_values[i]))
        plotting.plot_basis_function(args, num_rows, num_cols,
                                     np.array(max_values),
                                     'max_q_seed_' + str(seed))

        # Let me plot the final policy for this seed:
        policy = []
        for i in range(num_states):
            policy.append(np.argmax(q_values[i]))
        plotting.plot_policy(env, args, num_rows, num_cols, policy,
                             'policy_seed_' + str(seed))

    # Finally, I'll just plot the results to provide examples on how to call the functions you might be interested at:
    plotting.plot_learning_curve(num_steps_episode, args.output)
def sarsa_lambda(l=0.9, max_episodes=1000, policy=policies.epsilon_greedy,
                 n_zero=100, gamma=1, plot_learning_curve=True, multiproc=True):
    """ Applies eligibility trace version of Sarsa to the game Easy21

    :param l: lambda parameter
    :param max_episodes: stop learning after this many episodes
    :param policy: exploration strategy to use
    :param n_zero: epsilon greedy constant (only applicable if epsilon greedy policy is used)
    :param gamma: discounting rate
    :param plot_learning_curve: whether to turn on plotting of learning curve for lambda = 0 and 1
    :param multiproc: whether to use multiprocessing when doing plots or not (warning! turn off if running multiple
        algorithms on mac or windows simultaneously)
    :return: value function after max_episodes
    """
    # (player, dealer, action) key
    value_function = defaultdict(float)
    # (player, dealer) key
    counter_state = defaultdict(int)
    # (player, dealer, action) key
    counter_state_action = defaultdict(int)
    # no. of wins to calculate the percentage of wins at the end
    wins = 0

    # learning curve plotting
    if l in {0, 1} and plot_learning_curve:
        learning_curve = []
        try:
            mc_values = pickle.load(open("Data/MC_value_function.pickle", "rb"))
        except:
            mc_values = monte_carlo(iterations=1000000)

    for episode in range(max_episodes):

        # current (player, dealer, action)
        eligibility_trace = defaultdict(float)

        # initial state, action [SA..]
        state = environment.State()
        player_current = state.player_sum
        dealer_current = state.dealer_first_card
        epsilon = n_zero / float(n_zero + counter_state[(player_current, dealer_current)])
        action_current = policy(epsilon, value_function, state)

        while not state.terminal:

            # update counts
            counter_state[(player_current, dealer_current)] += 1
            counter_state_action[(player_current, dealer_current, action_current)] += 1

            # take a step, get reward [..R..]
            [state, reward] = environment.step(state, action_current)
            if reward is None:
                reward = 0

            # follow up state, action [..SA]
            player_next = state.player_sum
            dealer_next = state.dealer_first_card
            epsilon = n_zero / float(n_zero + counter_state[(player_next, dealer_next)])
            action_next = policy(epsilon, value_function, state)

            delta = reward + gamma * value_function[(player_next, dealer_next, action_next)] - \
                value_function[(player_current, dealer_current, action_current)]

            alpha = 1.0 / counter_state_action[(player_current, dealer_current, action_current)]

            eligibility_trace[(player_current, dealer_current, action_current)] += 1

            # update the values
            for key in value_function:
                value_function[key] += alpha * delta * eligibility_trace[key]
                eligibility_trace[key] *= gamma * l

            player_current = player_next
            dealer_current = dealer_next
            action_current = action_next

        # use it later to calculate the percentage of wins
        if reward == 1:
            wins += 1

        # get the episode MSE for plotting learning curve
        if l in {0, 1} and plot_learning_curve:
            learning_curve.append((episode, utilities.calculate_mse(mc_values, value_function)))

    # plot learning curve
    if l in {0, 1} and plot_learning_curve:
        if multiproc:
            # create a new process so computation can continue after plotting
            p = Process(target=plotting.plot_learning_curve, args=(learning_curve, l,))
            p.start()
        else:
            plotting.plot_learning_curve(learning_curve, l)

    # get the percentage of wins
    print float(wins) / max_episodes
    return value_function
def linear_function_approximation(l=0.9, max_episodes=1000, policy=policies.epsilon_greedy_lfa, n_zero=100,
                                  gamma=1, plot_learning_curve=True, multiproc=True):
    """ Value function approximation using coarse coding

    :param l: lambda parameter
    :param gamma: discounting rate
    :param max_episodes: stop learning after this many episodes
    :param policy: exploration strategy to use
    :param n_zero: epsilon greedy constant (only applicable if epsilon greedy policy is used)
    :param multiproc: whether to use multiprocessing when doing plots or not (warning! turn off if running multiple
        algorithms on mac or windows simultaneously)
    :return: value function after max_episodes
    """
    # weights vector for the state_action feature vector
    theta = np.random.random(36)*0.2
    # random move probability
    epsilon = 0.05
    # step-size parameter
    alpha = 0.01

    # learning curve plotting
    if l in {0, 1} and plot_learning_curve:
        learning_curve = []
        try:
            mc_values = pickle.load(open("Data/MC_value_function.pickle", "rb"))
        except:
            mc_values = monte_carlo(iterations=1000000)

    for episode in range(max_episodes):

        # key is state_action feature vector
        eligibility_trace = np.zeros(36)

        # initial state, action [SA..], and set of features
        state = environment.State()
        # calculate features for the given state
        state_features_current = utilities.get_state_features(state)
        # get action from this state
        q_a_current, action_current = policy(epsilon, theta, state_features_current)
        # calculate final state, action feature vector
        features_current = utilities.get_state_action_features(state_features_current, action_current)

        while not state.terminal:

            # update eligibility trace (accumulating)
            eligibility_trace = np.add(eligibility_trace, features_current)

            # take a step, get reward [..R..]
            [state, reward] = environment.step(state, action_current)
            if reward is None:
                reward = 0

            # follow up state, action [..SA]
            state_features_next = utilities.get_state_features(state)
            q_a_next, action_next = policy(epsilon, theta, state_features_next)
            features_next = utilities.get_state_action_features(state_features_next, action_next)

            # calculate state value difference
            delta = reward + gamma * q_a_next - q_a_current
            # update weights
            theta = np.add(theta, alpha * delta * eligibility_trace)
            # update trace
            eligibility_trace *= gamma * l

            features_current = features_next
            action_current = action_next

        # calculate value function
        value_function = defaultdict(float)
        for player in xrange(1, 22):
            for dealer in xrange(1, 11):
                for action in [0, 1]:
                    s = environment.State(dealer, player)
                    phi = utilities.get_state_action_features(utilities.get_state_features(s), action)
                    value_function[(s.player_sum, s.dealer_first_card, action)] = phi.dot(theta)

        # get the episode MSE for plotting learning curve
        if l in {0, 1} and plot_learning_curve:
            learning_curve.append((episode, utilities.calculate_mse(mc_values, value_function)))

    # plot learning curves
    if l in {0, 1} and plot_learning_curve:
        if multiproc:
            # create a new process so computation can continue after plotting
            p = Process(target=plotting.plot_learning_curve, args=(learning_curve, l,))
            p.start()
        else:
            plotting.plot_learning_curve(learning_curve, l)

    return value_function
예제 #14
0
                # Plot predicted vs. acutal on the same plot

                x = y_test.index
                title = f'Predicted vs. Actual Days Until Watering for {plant_name} Plant'
                fig_1 = plot_time_series(x,
                                         y_pred,
                                         title,
                                         'Predicted days Until Watering',
                                         color='red')

                plt.scatter(x,
                            y_test,
                            figure=fig_1,
                            label='Actual days until watering',
                            color='black',
                            s=1)
                plt.legend()

                # Plot learning curve
                tscv = TimeSeriesSplit(n_splits=10)
                plot_learning_curve(reg,
                                    X,
                                    y,
                                    cv=tscv,
                                    train_sizes=np.linspace(0.1, 1.0, 10),
                                    scoring='neg_mean_squared_error')

                # TODO - Forward-looking sunlight prediction - Connect to weather prediction API (sunny/cloudy) for light predictions? Correlate to light detected in training data

    plt.show()
예제 #15
0
def basicResults(clfObj,
                 trgX,
                 trgY,
                 tstX,
                 tstY,
                 params,
                 clf_type=None,
                 dataset=None,
                 feature_names=None,
                 scorer='accuracy',
                 complexity_curve=False,
                 complexity_params=None,
                 clf_name=""):
    np.random.seed(55)
    if clf_type is None or dataset is None:
        raise
    print("Starting grid search--------")
    cv = ms.GridSearchCV(clfObj,
                         n_jobs=1,
                         param_grid=params,
                         refit=True,
                         verbose=10,
                         cv=5,
                         scoring=scorer)
    cv.fit(trgX, trgY)

    # export_decision_tree(cv, feature_names, dataset)

    print("Ended     grid search--------")
    regTable = pd.DataFrame(cv.cv_results_)
    regTable.to_csv('./output/{}_{}_reg.csv'.format(clf_type, dataset),
                    index=False)
    test_score = cv.score(tstX, tstY)

    test_y_predicted = cv.predict(tstX)

    # PLOT Confusion Matrix
    cnf_matrix = confusion_matrix(tstY, test_y_predicted)
    plt = plot_confusion_matrix(cnf_matrix,
                                title='Confusion Matrix: {} - {}'.format(
                                    clf_type, dataset))
    OUTPUT_DIRECTORY = "output"
    plt.savefig('{}/images/{}_{}_CM.png'.format(OUTPUT_DIRECTORY, clf_type,
                                                dataset),
                format='png',
                dpi=150,
                bbox_inches='tight')

    with open('./output/test results.csv', 'a') as f:
        f.write('{},{},{},{}\n'.format(clf_type, dataset, test_score,
                                       cv.best_params_))
    N = trgY.shape[0]

    # Plot Learning Curve
    # curve = ms.learning_curve(cv.best_estimator_,trgX,trgY,cv=3,train_sizes=np.linspace(0.1, 1.0, 20),verbose=10,scoring=scorer)
    curve = ms.learning_curve(cv.best_estimator_,
                              trgX,
                              trgY,
                              cv=3,
                              train_sizes=np.linspace(0.2, 1.0, 10),
                              verbose=10,
                              scoring=scorer)
    curve_train_scores = pd.DataFrame(index=curve[0], data=curve[1])
    curve_test_scores = pd.DataFrame(index=curve[0], data=curve[2])
    curve_train_scores.to_csv('./output/{}_{}_LC_train.csv'.format(
        clf_type, dataset))
    curve_test_scores.to_csv('./output/{}_{}_LC_test.csv'.format(
        clf_type, dataset))

    plt = plot_learning_curve('Learning Curve: {} - {}'.format(
        clf_type, dataset),
                              curve[0],
                              curve[1],
                              curve[2],
                              y_label=scorer)
    plt.savefig('{}/images/{}_{}_LC.png'.format(OUTPUT_DIRECTORY, clf_type,
                                                dataset),
                format='png',
                dpi=150)

    if complexity_curve:
        make_complexity_curve(trgX,
                              trgY,
                              complexity_params['name'],
                              complexity_params['display_name'],
                              complexity_params['values'],
                              clfObj,
                              clf_name=clf_name,
                              dataset=dataset,
                              dataset_readable_name=dataset)
        print("Drew complexity curve")

    return cv
예제 #16
0
def iterationLC(clfObj,
                trgX,
                trgY,
                tstX,
                tstY,
                params,
                clf_type=None,
                dataset=None,
                dataset_readable_name=None,
                balanced_dataset=False,
                x_scale='linear',
                seed=55,
                threads=1,
                scorer='accuracy'):
    if not dataset_readable_name:
        dataset_readable_name = dataset

    np.random.seed(50)
    if clf_type is None or dataset is None:
        print("clf_type = ", clf_type)
        print("dataset = ", dataset)
        raise
    cv = ms.GridSearchCV(clfObj,
                         n_jobs=1,
                         param_grid=params,
                         refit=True,
                         verbose=10,
                         cv=5,
                         scoring=scorer)
    cv.fit(trgX, trgY)
    regTable = pd.DataFrame(cv.cv_results_)
    regTable.to_csv('./output/ITER_base_{}_{}.csv'.format(clf_type, dataset),
                    index=False)
    d = defaultdict(list)
    name = list(params.keys())[0]
    for value in list(params.values())[0]:
        d['param_{}'.format(name)].append(value)
        clfObj.set_params(**{name: value})
        clfObj.fit(trgX, trgY)
        pred = clfObj.predict(trgX)
        d['train acc'].append(balanced_accuracy(trgY, pred))
        clfObj.fit(trgX, trgY)
        pred = clfObj.predict(tstX)
        d['test acc'].append(balanced_accuracy(tstY, pred))
        print(value)
    d = pd.DataFrame(d)
    d.to_csv('./output/ITERtestSET_{}_{}.csv'.format(clf_type, dataset),
             index=False)

    plt = plot_learning_curve('{} - {} ({})'.format(clf_type,
                                                    dataset_readable_name,
                                                    name),
                              d['param_{}'.format(name)],
                              d['train acc'],
                              d['test acc'],
                              multiple_runs=False,
                              x_scale=x_scale,
                              x_label='Value',
                              y_label=scorer)
    plt.savefig('{}/images/{}_{}_ITER_LC.png'.format(OUTPUT_DIRECTORY,
                                                     clf_type, dataset),
                format='png',
                dpi=150)

    return cv