Beispiel #1
0
def fit_KFold(in_dim, no_classes, model_fn, X, y, X_val, y_val, K=5):
    folds = list(
        StratifiedKFold(n_splits=K, shuffle=True, random_state=1).split(X, y))

    for i, (idx_tr, idx_val) in enumerate(folds):
        print(f'\nFold: {i}')
        data_tr = (X[idx_tr], y[idx_tr])
        data_val = (X[idx_val], y[idx_val])

        name = f'models/final_model_fold_{i}.h5'
        callbacks = get_callbacks(name, data_tr, data_val)
        model = model_fn(in_dim, no_classes)
        model, hist = fit_model(model,
                                data_tr[0],
                                data_tr[1],
                                data_val[0],
                                data_val[1],
                                callbacks=callbacks,
                                epochs=30)
        plot_learning(hist.history, i)

        auc = evaluate_model(model, X_val, y_val)
        print(f'AUC score for fold {i}: {auc}')

        preds = model.predict(X_val)
        for i in range(len(preds)):
            preds[i][0] = round(preds[i][0])
        print(recall_score(y_val, preds))
        print(precision_score(y_val, preds))
        print(f1_score(y_val, preds))
        ep_step += 1

    scores.append(score)
    steps.append(episode)
    epsilons.append(agent.eps)
    average_score = np.mean(scores[-40:])
    print('Episode:', episode, ' Score: %.1f' % score,
          ' Average: %.1f' % average_score,
          ' Explore probability: %.2f' % agent.eps,
          ' Best average: %.2f' % best_score)

    if average_score > best_score:
        print('Best average score %.1f!' % average_score)
        best_score = average_score
        agent.save()
        plot_learning(steps, scores, epsilons, "pixel_wykres")
        df = pandas.DataFrame(data={
            "step": steps,
            "score": scores,
            "epsilon": epsilons
        })
        df.to_csv("pixel_log.csv", sep=';', index=False)

    if episode % 50 == 0:
        plot_learning(steps, scores, epsilons, "pixel_wykres")
        df = pandas.DataFrame(data={
            "step": steps,
            "score": scores,
            "epsilon": epsilons
        })
        df.to_csv("pixel_log.csv", sep=';', index=False)
Beispiel #3
0
                 input_dims=env.observation_space.shape,
                 n_actions=env.action_space.n,
                 mem_size=1000000,
                 batch_size=64,
                 epsilon_end=0.01)
scores = []
eps_history = []

for i in range(n_games):
    done = False
    score = 0
    observation = env.reset()
    while not done:
        # env.render()
        action = agent.choose_action(observation)
        observation_, reward, done, _ = env.step(action)
        score += reward
        agent.store_transition(observation, action, reward, observation_, done)
        observation = observation_
        agent.learn()
    eps_history.append(agent.epsilon)
    scores.append(score)

    avg_score = np.mean(scores[-100:])
    print('episode: ', i, 'score %.2f' % score,
          'average_score %.2f' % avg_score, 'epsilon %.2f' % agent.epsilon)

filename = 'lunar_lander.png'
x = [i + 1 for i in range(n_games)]
plot_learning(x, scores, eps_history, filename)
Beispiel #4
0
def learn(q_table,
          worldId=0,
          mode='train',
          learning_rate=0.001,
          gamma=0.9,
          epsilon=0.9,
          good_term_states=[],
          bad_term_states=[],
          epoch=0,
          obstacles=[],
          run_num=0,
          verbose=True):
    '''
    ~MAIN LEARNING FUNCTION~
    takes in:
    -the Q-table data structure (numpy 3-dimensional array)
    -worldID (for api and plotting)
    -mode (train or exploit)
    -learning rate (affects q-table calculation)
    -gamma (weighting of the rewards)
    -epsilon (determines the amount of random exploration the agen does)
    -good_term_states
    -bad_term_states
    -eposh
    -run number
    -verbosity

    returns: q_table [NumPy Array], good_term_states [list], bad_term_states [list], obstacles [list]


    '''

    #create the api instance
    a = api.API(worldId=worldId)
    w_res = a.enter_world()

    if verbose: print("w_res: ", w_res)

    #init terminal state reached
    terminal_state = False

    #create a var to track the type of terminal state
    good = False

    #accumulate the rewards so far for plotting reward over step
    rewards_acquired = []

    #find out where we are
    loc_response = a.locate_me()

    #create a list of everywhere we've been for the viz
    visited = []

    if verbose: print("loc_response", loc_response)

    #OK response looks like {"code":"OK","world":"0","state":"0:2"}
    if loc_response["code"] != "OK":
        print(
            f"something broke on locate_me call \nresponse lookes like: {loc_response}"
        )
        return -1

    # convert JSON into a tuple (x,y)
    location = int(loc_response["state"].split(':')[0]), int(
        loc_response["state"].split(':')[1])  #location is a tuple (x, y)

    # SET UP FIGURE FOR VISUALIZATION.
    pyplot.figure(1, figsize=(10, 10))
    curr_board = [[float('-inf')] * 40 for temp in range(40)]

    #keep track of where we've been for the visualization
    visited.append(location)
    while True:
        #////////////////// CODE FOR VISUALIZATION
        curr_board[location[1]][location[0]] = 1
        for i in range(len(curr_board)):
            for j in range(len(curr_board)):
                if (curr_board[i][j] != 0):
                    curr_board[i][j] -= .1
        for obstacle in obstacles:
            if obstacle in visited:
                obstacles.remove(obstacle)
        v.update_grid(curr_board, good_term_states, bad_term_states, obstacles,
                      run_num, epoch, worldId, location, verbose)
        #//////////////// END CODE FOR VISUALIZATION

        #in q-table, get index of best option for movement based on our current state in the world
        if mode == 'train':
            #use an episolon greedy approach to randomly explore or exploit
            if np.random.uniform() < epsilon:
                unexplored = np.where(
                    q_table[location[0]][location[1]].astype(int) == 0)[0]
                explored = np.where(
                    q_table[location[0]][location[1]].astype(int) != 0)[0]

                if unexplored.size != 0:
                    move_num = int(np.random.choice(unexplored))
                else:
                    move_num = int(np.random.choice(explored))
            else:
                move_num = np.argmax(q_table[location[0]][location[1]])

        else:
            #mode is exploit -we'll use what we already have in the q-table to decide on our moves
            move_num = np.argmax(q_table[location[0]][location[1]])

        #make the move - transition into a new state
        move_response = a.make_move(move=num_to_move(move_num),
                                    worldId=str(worldId))

        if verbose: print("move_response", move_response)
        #OK response looks like {"code":"OK","worldId":0,"runId":"931","reward":-0.1000000000,"scoreIncrement":-0.0800000000,"newState":{"x":"0","y":3}}

        if move_response["code"] != "OK":
            #handel the unexpected
            print(
                f"something broke on make_move call \nresponse lookes like: {move_response}"
            )

            move_failed = True
            while move_failed:
                move_response = a.make_move(move=num_to_move(move_num),
                                            worldId=str(worldId))

                print("\n\ntrying move again!!\n\n")

                if move_response["code"] == 'OK':
                    move_failed = False

        # check that we're not in a terminal state, and if not convert new location JSON into tuple
        if move_response["newState"] is not None:
            #we're now in new_loc, which will be a tuple of where we are according to the API
            #KEEP IN MIND the movment of our agent is apparently STOCHASTIC
            new_loc = int(move_response["newState"]["x"]), int(
                move_response["newState"]["y"])  #tuple (x,y)

            # keep track of if we hit any obstacles
            expected_loc = list(location)

            #convert the move we tried to make into an expected location where we think we'll end up (expected_loc)
            recent_move = num_to_move(move_num)

            if recent_move == "N":
                expected_loc[1] += 1
            elif recent_move == "S":
                expected_loc[1] -= 1
            elif recent_move == "E":
                expected_loc[0] += 1
            elif recent_move == "W":
                expected_loc[0] -= 1

            expected_loc = tuple(expected_loc)

            if verbose:
                print(f"New Loc: {new_loc} (where we actually are now):")
            if verbose:
                print(
                    f"Expected Loc: {expected_loc} (where we thought we were going to be):"
                )

            if (mode == "train"):
                obstacles.append(expected_loc)

            #continue to track where we have been
            visited.append(new_loc)

            #if we placed an obstacle there in the vis, remove it
            for obstacle in obstacles:
                if obstacle in visited:
                    obstacles.remove(obstacle)

        else:
            #we hit a terminal state
            terminal_state = True
            print(
                "\n\n--------------------------\nTERMINAL STATE ENCOUNTERED\n--------------------------\n\n"
            )

        #get the reward for the most recent move we made
        reward = float(move_response["reward"])

        #add reward to plot
        rewards_acquired.append(reward)

        #if we are training the model then update the q-table for the state we were in before
        #using the bellman-human algorithim
        if mode == "train":
            update_q_table(location, q_table, reward, gamma, new_loc,
                           learning_rate, move_num)

        #update our current location variable to our now current location
        location = new_loc

        #if we are in a terminal state then we need to collect the information for our visualization
        #and we need to end our current training epoch
        if terminal_state:
            print(f"Terminal State REWARD: {reward}")

            if reward > 0:
                #we hit a positive reward so keep track of it as a good reward terminal-state
                good = True
            if not (location in good_term_states) and not (location
                                                           in bad_term_states):
                #update our accounting of good and bad terminal states for the visualization
                if good:
                    good_term_states.append(location)
                else:
                    bad_term_states.append(location)

            #update our visualization a last time before moving onto the next epoch
            v.update_grid(curr_board, good_term_states, bad_term_states,
                          obstacles, run_num, epoch, worldId, location,
                          verbose)
            break

    #possibly not needed but this seperates out the plot
    pyplot.figure(2, figsize=(5, 5))
    #cumulative average for plotting reward by step over time purposes
    cumulative_average = np.cumsum(rewards_acquired) / (
        np.arange(len(rewards_acquired)) + 1)
    # plot reward over each step of the agent
    utils.plot_learning(worldId, epoch, cumulative_average, run_num)

    return q_table, good_term_states, bad_term_states, obstacles
Beispiel #5
0
        ep_step += 1

    scores.append(score)
    steps.append(episode)
    epsilons.append(agent.eps)
    average_score = np.mean(scores[-40:])
    print('Episode:', episode, ' Score: %.1f' % score,
          ' Average: %.1f' % average_score,
          ' Explore probability: %.2f' % agent.eps,
          ' Best average: %.2f' % best_score)

    if average_score > best_score:
        print('Best average score %.1f!' % average_score)
        best_score = average_score
        agent.save()
        plot_learning(steps, scores, epsilons, "ram_wykres")
        df = pandas.DataFrame(data={
            "step": steps,
            "score": scores,
            "epsilon": epsilons
        })
        df.to_csv("ram_log.csv", sep=';', index=False)

    if episode % 50 == 0:
        plot_learning(steps, scores, epsilons, "ram_wykres")
        df = pandas.DataFrame(data={
            "step": steps,
            "score": scores,
            "epsilon": epsilons
        })
        df.to_csv("ram_log.csv", sep=';', index=False)
Beispiel #6
0
def train_and_evaluate(model, optimizer, loss_fn, train_dataloader,
                       val_dataloader, metrics, params, model_dir, logger,
                       restore_file=None):
    """
    Train the model and evaluate on a validation dataset using the parameters
    specified in the params file path.
    :param model: (torch.nn.Module) the model to be trained
    :param optimizer: (torch.optim)
    :param loss_fn: (nn.MSEloss or nn.CrossEntropyLoss)
    :param train_dataloader: (torch.utils.data.Dataloader)
    :param val_dataloader: (torch.utils.data.Dataloader)
    :param metrics: (dict) metrics to be computed
    :param params: (dict) model parameters
    :param model_dir: (str) directory to output model performance
    :param restore_file: (str) path to model reload model weights
    :return: void
    """

    train_losses = []
    eval_losses = []

    # Reload weights if specified
    if restore_file != "":
        try:
            utils.load_checkpoint(restore_file, model, optimizer)
        except FileNotFoundError:
            print('[ERROR] Model weights file not found.')
        logger.write('[INFO] Restoring weights from file ' + restore_file)

    # Initiate best validation accuracy
    if params['validation_metric'] == 'RMSE':
        best_val_metric = np.Inf
    else:
        best_val_metric = 0.0

    for epoch in range(params['num_epochs']):
        # Train single epoch on the training set
        logger.write('[INFO] Training Epoch {}/{}'.format(epoch + 1, params['num_epochs']))
        train_loss = train(
            model, optimizer, loss_fn, train_dataloader, metrics, params, logger)
        train_losses.append(train_loss)

        # Evaluate single epoch on the validation set
        val_metrics, eval_loss = evaluate(
            model, loss_fn, val_dataloader, metrics, params, logger)
        eval_losses.append(eval_loss)
        val_metric = val_metrics[params['validation_metric']]

        # Determine if model is superior
        if params['validation_metric'] == 'RMSE':
            is_best = val_metric <= best_val_metric
        else:
            is_best = val_metric >= best_val_metric

        # Save weights
        utils.save_checkpoint(
            state={'epoch': epoch + 1,
                   'state_dict': model.state_dict(),
                   'optim_dict': optimizer.state_dict()},
            is_best=is_best, checkpoint=model_output)

        # Save superior models
        if is_best:
            logger.write('[INFO] New best {}: {}'.format(
                params['validation_metric'], val_metric))
            best_val_metric = val_metric

            # Save best val metrics
            best_json_path = os.path.join(
                model_dir, 'metrics_val_best_weights.json')
            utils.save_dict(
                {params['validation_metric']: str(val_metric)},
                best_json_path)

        # Save metrics
        last_json_path = os.path.join(
            model_dir, 'metrics_val_last_weights.json')
        utils.save_dict(
            {params['validation_metric']: str(val_metric)}, last_json_path)

    # Save learning plot
    utils.plot_learning(train_losses, eval_losses, model_dir)
Beispiel #7
0
                  env=env,
                  batch_size=64,
                  layer1_size=200,
                  layer2_size=200)

    score_history = []
    np.random.seed(0)
    nepisodes = 1000
    for ep in range(nepisodes):
        obs = env.reset()
        done = False
        score = 0
        #for i in range(1000):
        while not done:
            act = agent.choose_action(obs)
            new_state, reward, done, info = env.step(act)
            agent.remember(obs, act, reward, new_state, int(done))
            agent.learn()
            score += reward
            obs = new_state
            #env.render() To be linked with ROS
        score_history.append(score)
        print('episode', i, 'score %.2f' % score,
              '100 game average %.2f' % np.mean(score_history[-100:]))
        if i + 1 % 200 == 0:
            agent.save_models()
    env.close()
    roscore.terminate()
    filename = 'ParrotDrone.png'
    plot_learning(score_history, filename, window=100)
    agent.save_models()
Beispiel #8
0
    while not done:
        action = agent.choose_action(observation)
        observation_, reward, done, info = env.step(action)
        score += reward
        agent.store_transition(observation, action, reward, observation_, done)
        agent.learn()
        observation = observation_
    scores.append(score)
    eps_history.append(agent.epsilon)
    avg_score = np.mean(scores[-100:])
    print('epoch ', i, 'score %.2f' % score, 'average score %.2f' % avg_score,
          'epsilon %.2f' % agent.epsilon, 'bin count %i' % env.bin_count)

next_fit = NextFit(max_simultaneously_bins)
first_fit = FirstFit(max_simultaneously_bins)
best_fit = BestFit(max_simultaneously_bins)
item_provider.reset()
while item_provider.has_next():
    item = item_provider.next()
    next_fit.put(item)
    first_fit.put(item)
    best_fit.put(item)

print('Next fit:    ', next_fit.get_bin_count())
print('First fit:   ', first_fit.get_bin_count())
print('Best fit:    ', best_fit.get_bin_count())
print('Learned fit: ', env.bin_count)

x = [i + 1 for i in range(n_games)]
plot_learning(x, scores, eps_history, "%s_plot.png" % filename)