def fit_KFold(in_dim, no_classes, model_fn, X, y, X_val, y_val, K=5): folds = list( StratifiedKFold(n_splits=K, shuffle=True, random_state=1).split(X, y)) for i, (idx_tr, idx_val) in enumerate(folds): print(f'\nFold: {i}') data_tr = (X[idx_tr], y[idx_tr]) data_val = (X[idx_val], y[idx_val]) name = f'models/final_model_fold_{i}.h5' callbacks = get_callbacks(name, data_tr, data_val) model = model_fn(in_dim, no_classes) model, hist = fit_model(model, data_tr[0], data_tr[1], data_val[0], data_val[1], callbacks=callbacks, epochs=30) plot_learning(hist.history, i) auc = evaluate_model(model, X_val, y_val) print(f'AUC score for fold {i}: {auc}') preds = model.predict(X_val) for i in range(len(preds)): preds[i][0] = round(preds[i][0]) print(recall_score(y_val, preds)) print(precision_score(y_val, preds)) print(f1_score(y_val, preds))
ep_step += 1 scores.append(score) steps.append(episode) epsilons.append(agent.eps) average_score = np.mean(scores[-40:]) print('Episode:', episode, ' Score: %.1f' % score, ' Average: %.1f' % average_score, ' Explore probability: %.2f' % agent.eps, ' Best average: %.2f' % best_score) if average_score > best_score: print('Best average score %.1f!' % average_score) best_score = average_score agent.save() plot_learning(steps, scores, epsilons, "pixel_wykres") df = pandas.DataFrame(data={ "step": steps, "score": scores, "epsilon": epsilons }) df.to_csv("pixel_log.csv", sep=';', index=False) if episode % 50 == 0: plot_learning(steps, scores, epsilons, "pixel_wykres") df = pandas.DataFrame(data={ "step": steps, "score": scores, "epsilon": epsilons }) df.to_csv("pixel_log.csv", sep=';', index=False)
input_dims=env.observation_space.shape, n_actions=env.action_space.n, mem_size=1000000, batch_size=64, epsilon_end=0.01) scores = [] eps_history = [] for i in range(n_games): done = False score = 0 observation = env.reset() while not done: # env.render() action = agent.choose_action(observation) observation_, reward, done, _ = env.step(action) score += reward agent.store_transition(observation, action, reward, observation_, done) observation = observation_ agent.learn() eps_history.append(agent.epsilon) scores.append(score) avg_score = np.mean(scores[-100:]) print('episode: ', i, 'score %.2f' % score, 'average_score %.2f' % avg_score, 'epsilon %.2f' % agent.epsilon) filename = 'lunar_lander.png' x = [i + 1 for i in range(n_games)] plot_learning(x, scores, eps_history, filename)
def learn(q_table, worldId=0, mode='train', learning_rate=0.001, gamma=0.9, epsilon=0.9, good_term_states=[], bad_term_states=[], epoch=0, obstacles=[], run_num=0, verbose=True): ''' ~MAIN LEARNING FUNCTION~ takes in: -the Q-table data structure (numpy 3-dimensional array) -worldID (for api and plotting) -mode (train or exploit) -learning rate (affects q-table calculation) -gamma (weighting of the rewards) -epsilon (determines the amount of random exploration the agen does) -good_term_states -bad_term_states -eposh -run number -verbosity returns: q_table [NumPy Array], good_term_states [list], bad_term_states [list], obstacles [list] ''' #create the api instance a = api.API(worldId=worldId) w_res = a.enter_world() if verbose: print("w_res: ", w_res) #init terminal state reached terminal_state = False #create a var to track the type of terminal state good = False #accumulate the rewards so far for plotting reward over step rewards_acquired = [] #find out where we are loc_response = a.locate_me() #create a list of everywhere we've been for the viz visited = [] if verbose: print("loc_response", loc_response) #OK response looks like {"code":"OK","world":"0","state":"0:2"} if loc_response["code"] != "OK": print( f"something broke on locate_me call \nresponse lookes like: {loc_response}" ) return -1 # convert JSON into a tuple (x,y) location = int(loc_response["state"].split(':')[0]), int( loc_response["state"].split(':')[1]) #location is a tuple (x, y) # SET UP FIGURE FOR VISUALIZATION. pyplot.figure(1, figsize=(10, 10)) curr_board = [[float('-inf')] * 40 for temp in range(40)] #keep track of where we've been for the visualization visited.append(location) while True: #////////////////// CODE FOR VISUALIZATION curr_board[location[1]][location[0]] = 1 for i in range(len(curr_board)): for j in range(len(curr_board)): if (curr_board[i][j] != 0): curr_board[i][j] -= .1 for obstacle in obstacles: if obstacle in visited: obstacles.remove(obstacle) v.update_grid(curr_board, good_term_states, bad_term_states, obstacles, run_num, epoch, worldId, location, verbose) #//////////////// END CODE FOR VISUALIZATION #in q-table, get index of best option for movement based on our current state in the world if mode == 'train': #use an episolon greedy approach to randomly explore or exploit if np.random.uniform() < epsilon: unexplored = np.where( q_table[location[0]][location[1]].astype(int) == 0)[0] explored = np.where( q_table[location[0]][location[1]].astype(int) != 0)[0] if unexplored.size != 0: move_num = int(np.random.choice(unexplored)) else: move_num = int(np.random.choice(explored)) else: move_num = np.argmax(q_table[location[0]][location[1]]) else: #mode is exploit -we'll use what we already have in the q-table to decide on our moves move_num = np.argmax(q_table[location[0]][location[1]]) #make the move - transition into a new state move_response = a.make_move(move=num_to_move(move_num), worldId=str(worldId)) if verbose: print("move_response", move_response) #OK response looks like {"code":"OK","worldId":0,"runId":"931","reward":-0.1000000000,"scoreIncrement":-0.0800000000,"newState":{"x":"0","y":3}} if move_response["code"] != "OK": #handel the unexpected print( f"something broke on make_move call \nresponse lookes like: {move_response}" ) move_failed = True while move_failed: move_response = a.make_move(move=num_to_move(move_num), worldId=str(worldId)) print("\n\ntrying move again!!\n\n") if move_response["code"] == 'OK': move_failed = False # check that we're not in a terminal state, and if not convert new location JSON into tuple if move_response["newState"] is not None: #we're now in new_loc, which will be a tuple of where we are according to the API #KEEP IN MIND the movment of our agent is apparently STOCHASTIC new_loc = int(move_response["newState"]["x"]), int( move_response["newState"]["y"]) #tuple (x,y) # keep track of if we hit any obstacles expected_loc = list(location) #convert the move we tried to make into an expected location where we think we'll end up (expected_loc) recent_move = num_to_move(move_num) if recent_move == "N": expected_loc[1] += 1 elif recent_move == "S": expected_loc[1] -= 1 elif recent_move == "E": expected_loc[0] += 1 elif recent_move == "W": expected_loc[0] -= 1 expected_loc = tuple(expected_loc) if verbose: print(f"New Loc: {new_loc} (where we actually are now):") if verbose: print( f"Expected Loc: {expected_loc} (where we thought we were going to be):" ) if (mode == "train"): obstacles.append(expected_loc) #continue to track where we have been visited.append(new_loc) #if we placed an obstacle there in the vis, remove it for obstacle in obstacles: if obstacle in visited: obstacles.remove(obstacle) else: #we hit a terminal state terminal_state = True print( "\n\n--------------------------\nTERMINAL STATE ENCOUNTERED\n--------------------------\n\n" ) #get the reward for the most recent move we made reward = float(move_response["reward"]) #add reward to plot rewards_acquired.append(reward) #if we are training the model then update the q-table for the state we were in before #using the bellman-human algorithim if mode == "train": update_q_table(location, q_table, reward, gamma, new_loc, learning_rate, move_num) #update our current location variable to our now current location location = new_loc #if we are in a terminal state then we need to collect the information for our visualization #and we need to end our current training epoch if terminal_state: print(f"Terminal State REWARD: {reward}") if reward > 0: #we hit a positive reward so keep track of it as a good reward terminal-state good = True if not (location in good_term_states) and not (location in bad_term_states): #update our accounting of good and bad terminal states for the visualization if good: good_term_states.append(location) else: bad_term_states.append(location) #update our visualization a last time before moving onto the next epoch v.update_grid(curr_board, good_term_states, bad_term_states, obstacles, run_num, epoch, worldId, location, verbose) break #possibly not needed but this seperates out the plot pyplot.figure(2, figsize=(5, 5)) #cumulative average for plotting reward by step over time purposes cumulative_average = np.cumsum(rewards_acquired) / ( np.arange(len(rewards_acquired)) + 1) # plot reward over each step of the agent utils.plot_learning(worldId, epoch, cumulative_average, run_num) return q_table, good_term_states, bad_term_states, obstacles
ep_step += 1 scores.append(score) steps.append(episode) epsilons.append(agent.eps) average_score = np.mean(scores[-40:]) print('Episode:', episode, ' Score: %.1f' % score, ' Average: %.1f' % average_score, ' Explore probability: %.2f' % agent.eps, ' Best average: %.2f' % best_score) if average_score > best_score: print('Best average score %.1f!' % average_score) best_score = average_score agent.save() plot_learning(steps, scores, epsilons, "ram_wykres") df = pandas.DataFrame(data={ "step": steps, "score": scores, "epsilon": epsilons }) df.to_csv("ram_log.csv", sep=';', index=False) if episode % 50 == 0: plot_learning(steps, scores, epsilons, "ram_wykres") df = pandas.DataFrame(data={ "step": steps, "score": scores, "epsilon": epsilons }) df.to_csv("ram_log.csv", sep=';', index=False)
def train_and_evaluate(model, optimizer, loss_fn, train_dataloader, val_dataloader, metrics, params, model_dir, logger, restore_file=None): """ Train the model and evaluate on a validation dataset using the parameters specified in the params file path. :param model: (torch.nn.Module) the model to be trained :param optimizer: (torch.optim) :param loss_fn: (nn.MSEloss or nn.CrossEntropyLoss) :param train_dataloader: (torch.utils.data.Dataloader) :param val_dataloader: (torch.utils.data.Dataloader) :param metrics: (dict) metrics to be computed :param params: (dict) model parameters :param model_dir: (str) directory to output model performance :param restore_file: (str) path to model reload model weights :return: void """ train_losses = [] eval_losses = [] # Reload weights if specified if restore_file != "": try: utils.load_checkpoint(restore_file, model, optimizer) except FileNotFoundError: print('[ERROR] Model weights file not found.') logger.write('[INFO] Restoring weights from file ' + restore_file) # Initiate best validation accuracy if params['validation_metric'] == 'RMSE': best_val_metric = np.Inf else: best_val_metric = 0.0 for epoch in range(params['num_epochs']): # Train single epoch on the training set logger.write('[INFO] Training Epoch {}/{}'.format(epoch + 1, params['num_epochs'])) train_loss = train( model, optimizer, loss_fn, train_dataloader, metrics, params, logger) train_losses.append(train_loss) # Evaluate single epoch on the validation set val_metrics, eval_loss = evaluate( model, loss_fn, val_dataloader, metrics, params, logger) eval_losses.append(eval_loss) val_metric = val_metrics[params['validation_metric']] # Determine if model is superior if params['validation_metric'] == 'RMSE': is_best = val_metric <= best_val_metric else: is_best = val_metric >= best_val_metric # Save weights utils.save_checkpoint( state={'epoch': epoch + 1, 'state_dict': model.state_dict(), 'optim_dict': optimizer.state_dict()}, is_best=is_best, checkpoint=model_output) # Save superior models if is_best: logger.write('[INFO] New best {}: {}'.format( params['validation_metric'], val_metric)) best_val_metric = val_metric # Save best val metrics best_json_path = os.path.join( model_dir, 'metrics_val_best_weights.json') utils.save_dict( {params['validation_metric']: str(val_metric)}, best_json_path) # Save metrics last_json_path = os.path.join( model_dir, 'metrics_val_last_weights.json') utils.save_dict( {params['validation_metric']: str(val_metric)}, last_json_path) # Save learning plot utils.plot_learning(train_losses, eval_losses, model_dir)
env=env, batch_size=64, layer1_size=200, layer2_size=200) score_history = [] np.random.seed(0) nepisodes = 1000 for ep in range(nepisodes): obs = env.reset() done = False score = 0 #for i in range(1000): while not done: act = agent.choose_action(obs) new_state, reward, done, info = env.step(act) agent.remember(obs, act, reward, new_state, int(done)) agent.learn() score += reward obs = new_state #env.render() To be linked with ROS score_history.append(score) print('episode', i, 'score %.2f' % score, '100 game average %.2f' % np.mean(score_history[-100:])) if i + 1 % 200 == 0: agent.save_models() env.close() roscore.terminate() filename = 'ParrotDrone.png' plot_learning(score_history, filename, window=100) agent.save_models()
while not done: action = agent.choose_action(observation) observation_, reward, done, info = env.step(action) score += reward agent.store_transition(observation, action, reward, observation_, done) agent.learn() observation = observation_ scores.append(score) eps_history.append(agent.epsilon) avg_score = np.mean(scores[-100:]) print('epoch ', i, 'score %.2f' % score, 'average score %.2f' % avg_score, 'epsilon %.2f' % agent.epsilon, 'bin count %i' % env.bin_count) next_fit = NextFit(max_simultaneously_bins) first_fit = FirstFit(max_simultaneously_bins) best_fit = BestFit(max_simultaneously_bins) item_provider.reset() while item_provider.has_next(): item = item_provider.next() next_fit.put(item) first_fit.put(item) best_fit.put(item) print('Next fit: ', next_fit.get_bin_count()) print('First fit: ', first_fit.get_bin_count()) print('Best fit: ', best_fit.get_bin_count()) print('Learned fit: ', env.bin_count) x = [i + 1 for i in range(n_games)] plot_learning(x, scores, eps_history, "%s_plot.png" % filename)