def decision_tree_breast_cancer(): # Use grid search to test max_depth from 1 to 100. train_features, train_labels, test_features, test_labels = get_breast_cancer_data( ) start_time = datetime.now() estimator = DecisionTreeClassifier() cross_validation = ShuffleSplit(train_size=0.8, test_size=0.2) max_depth = range(1, 100, 1) classifier = GridSearchCV(estimator=estimator, cv=cross_validation, param_grid=dict(max_depth=max_depth)) classifier.fit(train_features, train_labels.ravel()) end_time = datetime.now() title = 'Decision Tree (max_depth = %s)' % ( classifier.best_estimator_.max_depth) plot_learning_curve(estimator, title, train_features, train_labels.ravel(), cv=cross_validation) plt.savefig('dtree_breast_cancer_trial_1.png') # Test performance on the optimal max depth optimal_depth = classifier.best_estimator_.max_depth estimator = DecisionTreeClassifier(max_depth=optimal_depth) estimator.fit(train_features, train_labels.ravel()) total_time_taken = str(end_time - start_time) score = classifier.score(test_features, test_labels.ravel()) train_accuracy = accuracy_score(train_labels, classifier.predict(train_features)) cross_validation_accuracy = cross_val_score(classifier, train_features, train_labels, cv=7).mean() test_accuracy = accuracy_score(test_labels, classifier.predict(test_features)) results = classifier.cv_results_ with open("results/dtree_breast_cancer.txt", 'w') as file: file.write("Decision Tree with Breast Cancer Dataset\n\n") file.write("Optimal Depth: " + str(optimal_depth) + "\n\n") file.write("CV Results:\n\n" + str(results) + "\n\n") file.write("Feature Importance: " + str(estimator.feature_importances_) + "\n\n") file.write("Score: " + str(score) + "\n\n") file.write("Training Accuracy: " + str(train_accuracy) + "\n\n") file.write("Cross Validation Accuracy: " + str(cross_validation_accuracy) + "\n\n") file.write("Testing Accuracy: " + str(test_accuracy) + "\n\n") file.write("Total Time Taken: " + strftime(total_time_taken)) train_accuracy = results['mean_train_score'] test_accuracy = results['mean_test_score'] plot_dtree_depth_performance("Breast Cancer", max_depth, train_accuracy, test_accuracy) plt.savefig("dtree_breast_cancer_depth.png")
def boosting_balance_scale(): train_features, train_labels, test_features, test_labels = get_balance_data( ) start_time = datetime.now() estimator = AdaBoostClassifier() cross_validation = ShuffleSplit(train_size=0.8, test_size=0.2) num_estimators = range(1, 100, 1) classifier = GridSearchCV(estimator=estimator, cv=cross_validation, param_grid=dict(n_estimators=num_estimators)) classifier.fit(train_features, train_labels.ravel()) end_time = datetime.now() total_time_taken = str(end_time - start_time) estimator = AdaBoostClassifier( n_estimators=classifier.best_estimator_.n_estimators) estimator.fit(train_features, train_labels.ravel()) print("Score: ", classifier.score(test_features, test_labels.ravel())) title = "Ada Boost (num_estimators = %s)" % str( classifier.best_estimator_.n_estimators) plot_learning_curve(estimator, title, train_features, train_labels.ravel(), cv=cross_validation) plt.savefig("adaboost_balance_scale.png") train_accuracy = accuracy_score(train_labels, classifier.predict(train_features)) cross_validation_accuracy = cross_val_score(classifier, train_features, train_labels, cv=7).mean() test_accuracy = accuracy_score(test_labels, classifier.predict(test_features)) results = classifier.cv_results_ with open("results/adaboost_balance_scale.txt", "w") as file: file.write("Adaboost with Balance Scale Dataset\n\n") file.write("Optimal Number of Estimators: " + str(classifier.best_estimator_.n_estimators)) file.write("Grid Scores:\n\n" + str(results)) file.write("Training Accuracy: " + str(train_accuracy) + "\n\n") file.write("Cross Validation Accuracy: " + str(cross_validation_accuracy) + "\n\n") file.write("Testing Accuracy: " + str(test_accuracy) + "\n\n") file.write("Total Time Taken: " + strftime(total_time_taken)) train_accuracy = results['mean_train_score'] test_accuracy = results['mean_test_score'] plot_boosting_performance("Balance Scale", num_estimators, train_accuracy, test_accuracy) plt.savefig("adaboost_balance_num_estimators.png")
def knn_breast_cancer(): train_features, train_labels, test_features, test_labels = get_breast_cancer_data( ) start_time = datetime.now() estimator = KNeighborsClassifier() cross_validation = ShuffleSplit(train_size=0.8, test_size=0.2) k = range(1, 21, 1) classifier = GridSearchCV(estimator=estimator, cv=cross_validation, param_grid=dict(n_neighbors=k)) classifier.fit(train_features, train_labels.ravel()) end_time = datetime.now() total_time_taken = str(end_time - start_time) estimator = KNeighborsClassifier( n_neighbors=classifier.best_estimator_.n_neighbors) estimator.fit(train_features, train_labels.ravel()) print("Score: ", classifier.score(test_features, test_labels.ravel())) title = 'KNN (k = %s)' % (classifier.best_estimator_.n_neighbors) plot_learning_curve(estimator, title, train_features, train_labels.ravel(), cv=cross_validation) plt.savefig("knn_breast_cancer.png") train_accuracy = accuracy_score(train_labels, classifier.predict(train_features)) cross_validation_accuracy = cross_val_score(classifier, train_features, train_labels, cv=7).mean() test_accuracy = accuracy_score(test_labels, classifier.predict(test_features)) results = classifier.cv_results_ with open("results/knn_breast_cancer.txt", "w") as file: file.write("KNN with Breast Cancer Dataset\n\n") file.write("Optimal Number of Estimators: " + str(classifier.best_estimator_.n_neighbors)) file.write("Grid Scores:\n\n" + str(results)) file.write("Training Accuracy: " + str(train_accuracy) + "\n\n") file.write("Cross Validation Accuracy: " + str(cross_validation_accuracy) + "\n\n") file.write("Testing Accuracy: " + str(test_accuracy) + "\n\n") file.write("Total Time Taken: " + strftime(total_time_taken)) train_accuracy = results['mean_train_score'] test_accuracy = results['mean_test_score'] plot_knn_k("Breast Cancer", k, train_accuracy, test_accuracy) plt.savefig("knn_breast_cancer_k.png")
def run_analysis(estimator, X_train, Y_train, X_test, Y_test, dataset, modelType, analysisRound, validationXLabel, param, param_values, x_labels=[]): # Train. estimator.fit(X_train, Y_train.ravel()) # Test. predictions = estimator.predict(X_test) # Confusion matrix. tn, fp, fn, tp = metrics.confusion_matrix(Y_test.ravel(), predictions).ravel() print(metrics.confusion_matrix(Y_test.ravel(), predictions)) print("Accuracy: " + str(metrics.accuracy_score(Y_test.ravel(), predictions))) print("Precision: " + str(metrics.precision_score(Y_test.ravel(), predictions))) print("Recall: " + str(metrics.recall_score(Y_test.ravel(), predictions))) # Learning Curve Analysis. cv = ShuffleSplit(n_splits=10, test_size=0.2, random_state=10) plotting.plot_learning_curve(estimator, X_train, Y_train.ravel(), dataset, modelType, analysisRound, ylim=(0.7, 1.01), cv=cv, n_jobs=4) # Model Complexity / Validation Curve Analysis. plotting.plot_validation_curve(estimator, X_train, Y_train.ravel(), dataset, modelType, analysisRound, validationXLabel, param, param_values, x_labels)
def main(): input_path = os.path.abspath(os.path.join('./data', args.dataset)) dataset = os.path.splitext(args.dataset)[0] logger.info('Load {}'.format(input_path)) params = {'test_size': 0.2, 'random_state': 1, 'cluster': 'kmeans'} X_train, X_test, y_train, y_test = data_loader.load(input_path, **params) logger.info('Split into train and test subsets: {}'.format(params)) params_path = os.path.abspath(os.path.join('./params', args.params)) with open(params_path) as file_: params = yaml.load(file_, Loader=yaml.SafeLoader) logger.info('Load {}'.format(params_path)) logger.info('Hyperparameters: {}'.format(params)) models = { 'MLP': nn.MLPClassifier, 'CNN': nn.CNNClassifier, 'RNN': nn.RNNClassifier } clf = models[args.model](**params) estimator = clf.__class__.__name__ logger.info('Train {} on {}'.format(estimator, dataset)) clf.fit(X_train, y_train) output_dir = os.path.abspath(args.output) os.makedirs(output_dir, exist_ok=True) csv_log = pd.DataFrame({ 'loss': clf.loss_curve_, 'train_score': clf.training_scores_, 'val_score': clf.validation_scores_ }) csv_log_path = os.path.join(output_dir, time.strftime('%Y%m%d-%H%M%S.csv')) csv_log.to_csv(csv_log_path) logger.info('Save learning log to {}'.format(csv_log_path)) if args.plot: plot_path = os.path.join(output_dir, time.strftime('%Y%m%d-%H%M%S.png')) plotting.plot_learning_curve(csv_log_path, '{} on {}'.format(estimator, dataset), plot_path) logger.info('Save learning curves to {}'.format(plot_path)) logger.info('Training score: {}'.format(clf.score(X_train, y_train))) logger.info('Testing score: {}'.format(clf.score(X_test, y_test))) logger.info('Done')
def svm_balance_scale(): train_features, train_labels, test_features, test_labels = get_balance_data() start_time = datetime.now() estimator = SVC() cross_validation = ShuffleSplit(train_size=0.8, test_size=0.2) kernels = ["linear", "sigmoid", "rbf"] classifier = GridSearchCV( estimator=estimator, cv=cross_validation, param_grid=dict(kernel=kernels)) classifier.fit(train_features, train_labels.ravel()) title = 'SVM (kernel = %s)' % (classifier.best_estimator_.kernel) estimator = SVC(kernel=classifier.best_estimator_.kernel) estimator.fit(train_features, train_labels.ravel()) print("Score: ", classifier.score(test_features, test_labels.ravel())) plot_learning_curve(estimator, title, train_features, train_labels.ravel(), cv=cross_validation) plt.savefig('svm_balance_scale.png') train_accuracy = accuracy_score(train_labels, classifier.predict(train_features)) cross_validation_accuracy = cross_val_score(classifier, train_features, train_labels, cv=7).mean() test_accuracy = accuracy_score(test_labels, classifier.predict(test_features)) end_time = datetime.now() total_time_taken = str(end_time - start_time) results = classifier.cv_results_ with open("results/svm_balance_scale.txt", 'w') as file: file.write("SVM with Balance Scale Dataset\n\n") file.write("Best Kernel: " + str(classifier.best_estimator_.kernel) + "\n\n") file.write("Grid Scores:\n\n" + str(results)) file.write("Training Accuracy: " + str(train_accuracy) + "\n\n") file.write("Cross Validation Accuracy: " + str(cross_validation_accuracy) + "\n\n") file.write("Testing Accuracy: " + str(test_accuracy) + "\n\n") file.write("Total Time Taken: " + strftime(total_time_taken)) train_accuracy = results['mean_train_score'] test_accuracy = results['mean_test_score'] plot_svm_performance("Balance Scale", kernels, train_accuracy, test_accuracy) plt.savefig("svm_balance_scale_kernels.png")
def sarsa_lambda(l=0.9, max_episodes=1000, policy=policies.epsilon_greedy, n_zero=100, gamma=1, plot_learning_curve=True, multiproc=True): """ Applies eligibility trace version of Sarsa to the game Easy21 :param l: lambda parameter :param max_episodes: stop learning after this many episodes :param policy: exploration strategy to use :param n_zero: epsilon greedy constant (only applicable if epsilon greedy policy is used) :param gamma: discounting rate :param plot_learning_curve: whether to turn on plotting of learning curve for lambda = 0 and 1 :param multiproc: whether to use multiprocessing when doing plots or not (warning! turn off if running multiple algorithms on mac or windows simultaneously) :return: value function after max_episodes """ # (player, dealer, action) key value_function = defaultdict(float) # (player, dealer) key counter_state = defaultdict(int) # (player, dealer, action) key counter_state_action = defaultdict(int) # no. of wins to calculate the percentage of wins at the end wins = 0 # learning curve plotting if l in {0, 1} and plot_learning_curve: learning_curve = [] try: mc_values = pickle.load(open("Data/MC_value_function.pickle", "rb")) except: mc_values = monte_carlo(iterations=1000000) for episode in range(max_episodes): # current (player, dealer, action) eligibility_trace = defaultdict(float) # initial state, action [SA..] state = environment.State() player_current = state.player_sum dealer_current = state.dealer_first_card epsilon = n_zero / float(n_zero + counter_state[ (player_current, dealer_current)]) action_current = policy(epsilon, value_function, state) while not state.terminal: # update counts counter_state[(player_current, dealer_current)] += 1 counter_state_action[(player_current, dealer_current, action_current)] += 1 # take a step, get reward [..R..] [state, reward] = environment.step(state, action_current) if reward is None: reward = 0 # follow up state, action [..SA] player_next = state.player_sum dealer_next = state.dealer_first_card epsilon = n_zero / float(n_zero + counter_state[(player_next, dealer_next)]) action_next = policy(epsilon, value_function, state) delta = reward + gamma * value_function[(player_next, dealer_next, action_next)] - \ value_function[(player_current, dealer_current, action_current)] alpha = 1.0 / counter_state_action[(player_current, dealer_current, action_current)] eligibility_trace[(player_current, dealer_current, action_current)] += 1 # update the values for key in value_function: value_function[key] += alpha * delta * eligibility_trace[key] eligibility_trace[key] *= gamma * l player_current = player_next dealer_current = dealer_next action_current = action_next # use it later to calculate the percentage of wins if reward == 1: wins += 1 # get the episode MSE for plotting learning curve if l in {0, 1} and plot_learning_curve: learning_curve.append( (episode, utilities.calculate_mse(mc_values, value_function))) # plot learning curve if l in {0, 1} and plot_learning_curve: if multiproc: # create a new process so computation can continue after plotting p = Process(target=plotting.plot_learning_curve, args=( learning_curve, l, )) p.start() else: plotting.plot_learning_curve(learning_curve, l) # get the percentage of wins print float(wins) / max_episodes return value_function
def linear_function_approximation(l=0.9, max_episodes=1000, policy=policies.epsilon_greedy_lfa, n_zero=100, gamma=1, plot_learning_curve=True, multiproc=True): """ Value function approximation using coarse coding :param l: lambda parameter :param gamma: discounting rate :param max_episodes: stop learning after this many episodes :param policy: exploration strategy to use :param n_zero: epsilon greedy constant (only applicable if epsilon greedy policy is used) :param multiproc: whether to use multiprocessing when doing plots or not (warning! turn off if running multiple algorithms on mac or windows simultaneously) :return: value function after max_episodes """ # weights vector for the state_action feature vector theta = np.random.random(36) * 0.2 # random move probability epsilon = 0.05 # step-size parameter alpha = 0.01 # learning curve plotting if l in {0, 1} and plot_learning_curve: learning_curve = [] try: mc_values = pickle.load(open("Data/MC_value_function.pickle", "rb")) except: mc_values = monte_carlo(iterations=1000000) for episode in range(max_episodes): # key is state_action feature vector eligibility_trace = np.zeros(36) # initial state, action [SA..], and set of features state = environment.State() # calculate features for the given state state_features_current = utilities.get_state_features(state) # get action from this state q_a_current, action_current = policy(epsilon, theta, state_features_current) # calculate final state, action feature vector features_current = utilities.get_state_action_features( state_features_current, action_current) while not state.terminal: # update eligibility trace (accumulating) eligibility_trace = np.add(eligibility_trace, features_current) # take a step, get reward [..R..] [state, reward] = environment.step(state, action_current) if reward is None: reward = 0 # follow up state, action [..SA] state_features_next = utilities.get_state_features(state) q_a_next, action_next = policy(epsilon, theta, state_features_next) features_next = utilities.get_state_action_features( state_features_next, action_next) # calculate state value difference delta = reward + gamma * q_a_next - q_a_current # update weights theta = np.add(theta, alpha * delta * eligibility_trace) # update trace eligibility_trace *= gamma * l features_current = features_next action_current = action_next # calculate value function value_function = defaultdict(float) for player in xrange(1, 22): for dealer in xrange(1, 11): for action in [0, 1]: s = environment.State(dealer, player) phi = utilities.get_state_action_features( utilities.get_state_features(s), action) value_function[(s.player_sum, s.dealer_first_card, action)] = phi.dot(theta) # get the episode MSE for plotting learning curve if l in {0, 1} and plot_learning_curve: learning_curve.append( (episode, utilities.calculate_mse(mc_values, value_function))) # plot learning curves if l in {0, 1} and plot_learning_curve: if multiproc: # create a new process so computation can continue after plotting p = Process(target=plotting.plot_learning_curve, args=( learning_curve, l, )) p.start() else: plotting.plot_learning_curve(learning_curve, l) return value_function
def main(args): path = os.getcwd() parent = os.path.dirname(path) k = 5 logreg = make_sgd_classifier() if args.plot: x, y, x_transformer = load_data( os.path.join(parent, 'data', args.training_data)) # Use n_jobs=-1 to make use of all cores. plt = plot_learning_curve( logreg, 'Logistic regression: Accuracy / Training example', x, y.argmax(axis=1), cv=k, n_jobs=-1) plt.show() elif args.test_learning_rate: x, y, x_transformer = load_data( os.path.join(parent, 'data', args.training_data)) eta0s = [0.00001, 0.00003, 0.0001, 0.0003, 0.001, 0.003, 0.01, 0.03] for eta0 in eta0s: logreg_eta = make_sgd_classifier(eta0=eta0) results = cross_validate(logreg_eta, x, y.argmax(axis=1), cv=k, n_jobs=-1, return_train_score=True) train_score = numpy.mean(results['train_score']) test_score = numpy.mean(results['test_score']) print( f'Eta0 {eta0}; Train score {train_score}; Test score {test_score}' ) elif args.test_k_fold: x, y, x_transformer = load_data( os.path.join(parent, 'data', args.training_data)) ks = [3, 5, 7, 10] for k in ks: logreg = make_sgd_classifier() results = cross_validate(logreg, x, y.argmax(axis=1), cv=k, n_jobs=-1, return_train_score=True) train_score = numpy.mean(results['train_score']) test_score = numpy.mean(results['test_score']) print(f'K {k}; Train score {train_score}; Test score {test_score}') else: train_x, train_y_non_one_hot, validation_x, validation_y, x_transformer = load_data_and_split( os.path.join(parent, 'data', args.training_data), k=k) train_y = one_hot(train_y_non_one_hot) if args.load_model: logreg = joblib.load(args.load_model) else: logreg.fit(train_x, train_y.argmax(axis=1)) print('Train score: {}'.format( logreg.score(train_x, train_y.argmax(axis=1)))) print('Validation score: {}'.format( logreg.score(validation_x, validation_y.argmax(axis=1)))) if args.predict or args.predict_proba: predict_data, y, timestamps = load_prediction_data( args.training_data, args.predict, x_transformer) if args.predict: predictions = logreg.predict(predict_data) results = pd.DataFrame(data={ 'label': y, 'prediction': predictions }, index=timestamps) print(results) if args.predict_proba: predictions = logreg.predict_proba(predict_data) results = pd.DataFrame(data=predictions, index=timestamps) print(results) if args.confidence: probabilities = logreg.predict_proba(train_x) predictions = logreg.predict(train_x) probas_predictions_labels = numpy.concatenate( (probabilities, predictions.reshape(-1, 1), train_y_non_one_hot), axis=1) df = pd.DataFrame( probas_predictions_labels, columns=['a0', 'a1', 'a2', 'a3', 'prediction', 'label']) correct_predictions = df.loc[df['prediction'] == df['label']] highest_probas_correct = correct_predictions[[ 'a0', 'a1', 'a2', 'a3' ]].max(axis=1) highest_probas_correct_avg = numpy.average(highest_probas_correct) highest_probas_correct_std = numpy.std(highest_probas_correct) highest_probas = numpy.max(probabilities, axis=1) highest_probas_avg = numpy.average(highest_probas) highest_probas_std = numpy.std(highest_probas) print('Highest avg. probability:', highest_probas_avg) print('Highest probability std:', highest_probas_std) print('Highest correct probabilities avg:', highest_probas_correct_avg) print('Highest correct probabilities std:', highest_probas_correct_std) if args.save_model: model_directory = os.path.join(parent, 'trained_models') if not os.path.exists(model_directory): os.makedirs(model_directory) joblib.dump( logreg, os.path.join(model_directory, args.save_model + '.joblib'))
cv_results = model_selection.cross_val_score(model, X_train, Y_train, cv=kfold, scoring=scoring) results.append(cv_results) names.append(name) msg = "%s: %f (%f)" % (name, cv_results.mean(), cv_results.std()) print(msg) # Now for graphs title = "Learning Curves (" + name + ")" print("Creating Learning curves for %s" % name) # Cross validation with 100 iterations to get smoother mean test and train # score curves, each time with 20% data randomly selected as a validation set. cv = ShuffleSplit(n_splits=100, test_size=0.2, random_state=0) # Get plot for this model and data. plot_learning_curve(model, title, X, y, ylim=(0.3, 1.01), cv=cv, n_jobs=1) plt.show() # # # # ## From the models above, we choose DecisionTreeClassifierself. # estimator = DecisionTreeClassifier(random_state=0) # estimator.fit(X_train, Y_train) # # # The decision estimator has an attribute called tree_ which stores the entire # # tree structure and allows access to low level attributes. The binary tree # # tree_ is represented as a number of parallel arrays. The i-th element of each # # array holds information about the node `i`. Node 0 is the tree's root. NOTE: # # Some of the arrays only apply to either leaves or split nodes, resp. In this # # case the values of nodes of the other type are arbitrary!
r = env.act(env.get_action_set()[a]) next_s = env.get_current_state() next_a = utils.epsilon_greedy(q_values[next_s]) td_error = r + gamma * q_values[next_s][next_a] - q_values[s][a] q_values[s][a] = q_values[s][a] + step_size * td_error s = next_s a = next_a num_steps += 1 env.reset() num_steps_episode[seed].append(num_steps) # Let me plot the max q-values for this seed: max_values = [] for i in range(num_states): max_values.append(np.max(q_values[i])) plotting.plot_basis_function(args, num_rows, num_cols, np.array(max_values), 'max_q_seed_' + str(seed)) # Let me plot the final policy for this seed: policy = [] for i in range(num_states): policy.append(np.argmax(q_values[i])) plotting.plot_policy(env, args, num_rows, num_cols, policy, 'policy_seed_' + str(seed)) # Finally, I'll just plot the results to provide examples on how to call the functions you might be interested at: plotting.plot_learning_curve(num_steps_episode, args.output)
def sarsa_lambda(l=0.9, max_episodes=1000, policy=policies.epsilon_greedy, n_zero=100, gamma=1, plot_learning_curve=True, multiproc=True): """ Applies eligibility trace version of Sarsa to the game Easy21 :param l: lambda parameter :param max_episodes: stop learning after this many episodes :param policy: exploration strategy to use :param n_zero: epsilon greedy constant (only applicable if epsilon greedy policy is used) :param gamma: discounting rate :param plot_learning_curve: whether to turn on plotting of learning curve for lambda = 0 and 1 :param multiproc: whether to use multiprocessing when doing plots or not (warning! turn off if running multiple algorithms on mac or windows simultaneously) :return: value function after max_episodes """ # (player, dealer, action) key value_function = defaultdict(float) # (player, dealer) key counter_state = defaultdict(int) # (player, dealer, action) key counter_state_action = defaultdict(int) # no. of wins to calculate the percentage of wins at the end wins = 0 # learning curve plotting if l in {0, 1} and plot_learning_curve: learning_curve = [] try: mc_values = pickle.load(open("Data/MC_value_function.pickle", "rb")) except: mc_values = monte_carlo(iterations=1000000) for episode in range(max_episodes): # current (player, dealer, action) eligibility_trace = defaultdict(float) # initial state, action [SA..] state = environment.State() player_current = state.player_sum dealer_current = state.dealer_first_card epsilon = n_zero / float(n_zero + counter_state[(player_current, dealer_current)]) action_current = policy(epsilon, value_function, state) while not state.terminal: # update counts counter_state[(player_current, dealer_current)] += 1 counter_state_action[(player_current, dealer_current, action_current)] += 1 # take a step, get reward [..R..] [state, reward] = environment.step(state, action_current) if reward is None: reward = 0 # follow up state, action [..SA] player_next = state.player_sum dealer_next = state.dealer_first_card epsilon = n_zero / float(n_zero + counter_state[(player_next, dealer_next)]) action_next = policy(epsilon, value_function, state) delta = reward + gamma * value_function[(player_next, dealer_next, action_next)] - \ value_function[(player_current, dealer_current, action_current)] alpha = 1.0 / counter_state_action[(player_current, dealer_current, action_current)] eligibility_trace[(player_current, dealer_current, action_current)] += 1 # update the values for key in value_function: value_function[key] += alpha * delta * eligibility_trace[key] eligibility_trace[key] *= gamma * l player_current = player_next dealer_current = dealer_next action_current = action_next # use it later to calculate the percentage of wins if reward == 1: wins += 1 # get the episode MSE for plotting learning curve if l in {0, 1} and plot_learning_curve: learning_curve.append((episode, utilities.calculate_mse(mc_values, value_function))) # plot learning curve if l in {0, 1} and plot_learning_curve: if multiproc: # create a new process so computation can continue after plotting p = Process(target=plotting.plot_learning_curve, args=(learning_curve, l,)) p.start() else: plotting.plot_learning_curve(learning_curve, l) # get the percentage of wins print float(wins) / max_episodes return value_function
def linear_function_approximation(l=0.9, max_episodes=1000, policy=policies.epsilon_greedy_lfa, n_zero=100, gamma=1, plot_learning_curve=True, multiproc=True): """ Value function approximation using coarse coding :param l: lambda parameter :param gamma: discounting rate :param max_episodes: stop learning after this many episodes :param policy: exploration strategy to use :param n_zero: epsilon greedy constant (only applicable if epsilon greedy policy is used) :param multiproc: whether to use multiprocessing when doing plots or not (warning! turn off if running multiple algorithms on mac or windows simultaneously) :return: value function after max_episodes """ # weights vector for the state_action feature vector theta = np.random.random(36)*0.2 # random move probability epsilon = 0.05 # step-size parameter alpha = 0.01 # learning curve plotting if l in {0, 1} and plot_learning_curve: learning_curve = [] try: mc_values = pickle.load(open("Data/MC_value_function.pickle", "rb")) except: mc_values = monte_carlo(iterations=1000000) for episode in range(max_episodes): # key is state_action feature vector eligibility_trace = np.zeros(36) # initial state, action [SA..], and set of features state = environment.State() # calculate features for the given state state_features_current = utilities.get_state_features(state) # get action from this state q_a_current, action_current = policy(epsilon, theta, state_features_current) # calculate final state, action feature vector features_current = utilities.get_state_action_features(state_features_current, action_current) while not state.terminal: # update eligibility trace (accumulating) eligibility_trace = np.add(eligibility_trace, features_current) # take a step, get reward [..R..] [state, reward] = environment.step(state, action_current) if reward is None: reward = 0 # follow up state, action [..SA] state_features_next = utilities.get_state_features(state) q_a_next, action_next = policy(epsilon, theta, state_features_next) features_next = utilities.get_state_action_features(state_features_next, action_next) # calculate state value difference delta = reward + gamma * q_a_next - q_a_current # update weights theta = np.add(theta, alpha * delta * eligibility_trace) # update trace eligibility_trace *= gamma * l features_current = features_next action_current = action_next # calculate value function value_function = defaultdict(float) for player in xrange(1, 22): for dealer in xrange(1, 11): for action in [0, 1]: s = environment.State(dealer, player) phi = utilities.get_state_action_features(utilities.get_state_features(s), action) value_function[(s.player_sum, s.dealer_first_card, action)] = phi.dot(theta) # get the episode MSE for plotting learning curve if l in {0, 1} and plot_learning_curve: learning_curve.append((episode, utilities.calculate_mse(mc_values, value_function))) # plot learning curves if l in {0, 1} and plot_learning_curve: if multiproc: # create a new process so computation can continue after plotting p = Process(target=plotting.plot_learning_curve, args=(learning_curve, l,)) p.start() else: plotting.plot_learning_curve(learning_curve, l) return value_function
# Plot predicted vs. acutal on the same plot x = y_test.index title = f'Predicted vs. Actual Days Until Watering for {plant_name} Plant' fig_1 = plot_time_series(x, y_pred, title, 'Predicted days Until Watering', color='red') plt.scatter(x, y_test, figure=fig_1, label='Actual days until watering', color='black', s=1) plt.legend() # Plot learning curve tscv = TimeSeriesSplit(n_splits=10) plot_learning_curve(reg, X, y, cv=tscv, train_sizes=np.linspace(0.1, 1.0, 10), scoring='neg_mean_squared_error') # TODO - Forward-looking sunlight prediction - Connect to weather prediction API (sunny/cloudy) for light predictions? Correlate to light detected in training data plt.show()
def basicResults(clfObj, trgX, trgY, tstX, tstY, params, clf_type=None, dataset=None, feature_names=None, scorer='accuracy', complexity_curve=False, complexity_params=None, clf_name=""): np.random.seed(55) if clf_type is None or dataset is None: raise print("Starting grid search--------") cv = ms.GridSearchCV(clfObj, n_jobs=1, param_grid=params, refit=True, verbose=10, cv=5, scoring=scorer) cv.fit(trgX, trgY) # export_decision_tree(cv, feature_names, dataset) print("Ended grid search--------") regTable = pd.DataFrame(cv.cv_results_) regTable.to_csv('./output/{}_{}_reg.csv'.format(clf_type, dataset), index=False) test_score = cv.score(tstX, tstY) test_y_predicted = cv.predict(tstX) # PLOT Confusion Matrix cnf_matrix = confusion_matrix(tstY, test_y_predicted) plt = plot_confusion_matrix(cnf_matrix, title='Confusion Matrix: {} - {}'.format( clf_type, dataset)) OUTPUT_DIRECTORY = "output" plt.savefig('{}/images/{}_{}_CM.png'.format(OUTPUT_DIRECTORY, clf_type, dataset), format='png', dpi=150, bbox_inches='tight') with open('./output/test results.csv', 'a') as f: f.write('{},{},{},{}\n'.format(clf_type, dataset, test_score, cv.best_params_)) N = trgY.shape[0] # Plot Learning Curve # curve = ms.learning_curve(cv.best_estimator_,trgX,trgY,cv=3,train_sizes=np.linspace(0.1, 1.0, 20),verbose=10,scoring=scorer) curve = ms.learning_curve(cv.best_estimator_, trgX, trgY, cv=3, train_sizes=np.linspace(0.2, 1.0, 10), verbose=10, scoring=scorer) curve_train_scores = pd.DataFrame(index=curve[0], data=curve[1]) curve_test_scores = pd.DataFrame(index=curve[0], data=curve[2]) curve_train_scores.to_csv('./output/{}_{}_LC_train.csv'.format( clf_type, dataset)) curve_test_scores.to_csv('./output/{}_{}_LC_test.csv'.format( clf_type, dataset)) plt = plot_learning_curve('Learning Curve: {} - {}'.format( clf_type, dataset), curve[0], curve[1], curve[2], y_label=scorer) plt.savefig('{}/images/{}_{}_LC.png'.format(OUTPUT_DIRECTORY, clf_type, dataset), format='png', dpi=150) if complexity_curve: make_complexity_curve(trgX, trgY, complexity_params['name'], complexity_params['display_name'], complexity_params['values'], clfObj, clf_name=clf_name, dataset=dataset, dataset_readable_name=dataset) print("Drew complexity curve") return cv
def iterationLC(clfObj, trgX, trgY, tstX, tstY, params, clf_type=None, dataset=None, dataset_readable_name=None, balanced_dataset=False, x_scale='linear', seed=55, threads=1, scorer='accuracy'): if not dataset_readable_name: dataset_readable_name = dataset np.random.seed(50) if clf_type is None or dataset is None: print("clf_type = ", clf_type) print("dataset = ", dataset) raise cv = ms.GridSearchCV(clfObj, n_jobs=1, param_grid=params, refit=True, verbose=10, cv=5, scoring=scorer) cv.fit(trgX, trgY) regTable = pd.DataFrame(cv.cv_results_) regTable.to_csv('./output/ITER_base_{}_{}.csv'.format(clf_type, dataset), index=False) d = defaultdict(list) name = list(params.keys())[0] for value in list(params.values())[0]: d['param_{}'.format(name)].append(value) clfObj.set_params(**{name: value}) clfObj.fit(trgX, trgY) pred = clfObj.predict(trgX) d['train acc'].append(balanced_accuracy(trgY, pred)) clfObj.fit(trgX, trgY) pred = clfObj.predict(tstX) d['test acc'].append(balanced_accuracy(tstY, pred)) print(value) d = pd.DataFrame(d) d.to_csv('./output/ITERtestSET_{}_{}.csv'.format(clf_type, dataset), index=False) plt = plot_learning_curve('{} - {} ({})'.format(clf_type, dataset_readable_name, name), d['param_{}'.format(name)], d['train acc'], d['test acc'], multiple_runs=False, x_scale=x_scale, x_label='Value', y_label=scorer) plt.savefig('{}/images/{}_{}_ITER_LC.png'.format(OUTPUT_DIRECTORY, clf_type, dataset), format='png', dpi=150) return cv