def main():
    global epsilon
    global memory
    """
    This function will be called for training phase.
    """
    # How to sample minerl data is document here:
    # http://minerl.io/docs/tutorials/data_sampling.html
    data = minerl.data.make(MINERL_GYM_ENV, data_dir=MINERL_DATA_ROOT)

    # Sample code for illustration, add your training code below
    env = gym.make(MINERL_GYM_ENV)

    env.make_interactive(port=6666, realtime=True)

    aicrowd_helper.training_start()
    episodes = 1024
    trajectory = data.load_data("v3_excellent_pluot_behemoth-4_3461-4804")
    for episode in range(episodes):
        obs = env.reset()
        done = False
        netr = 0

        with alive_bar(title=f"episode: {episode}") as bar:
            bar.text("replaying trajectory")
            for state, action, reward, next_state, done in trajectory:
                obs, reward, done, info = env.step(action)
                bar()
            i = 0
            bar.text("testing inputs")
            while not done:
                print(i % 64)
                action = env.action_space.noop()
                vec = np.zeros((64,))
                vec[i % 64] = -0.5
                action["vector"] = vec
                obs, reward, done, info = env.step(action)
                netr += reward
                bar()
                i += 1

        aicrowd_helper.register_progress(episode / episodes)

    # Save trained model to train/ directory
    # Training 100% Completed
    aicrowd_helper.register_progress(1)
    aicrowd_helper.training_end()
    env.close()
Example #2
0
def main():
    global epsilon
    global memory
    """
    This function will be called for training phase.
    """
    # How to sample minerl data is document here:
    # http://minerl.io/docs/tutorials/data_sampling.html
    data = minerl.data.make(MINERL_GYM_ENV, data_dir=MINERL_DATA_ROOT)

    # Sample code for illustration, add your training code below
    env = gym.make(MINERL_GYM_ENV)

    # pre train
    if (checkpoint_dir / "pretrain.h5").exists():
        print("Loading pretrain weights")
        model.load_weights(checkpoint_dir / "pretrain.h5")
    else:
        with alive_bar(title="pretrain", calibrate=120) as bar:
            for current_state, action, reward, next_state, done in data.batch_iter(batch_size=2, num_epochs=5, seq_len=32):
                loss = model.train_on_batch([current_state["pov"].reshape(-1, 64, 64, 3), current_state["vector"].reshape(-1, 64)], action["vector"].reshape(-1, 64))
                bar.text(f"loss: {loss}")
                bar()
        model.save_weights(checkpoint_dir / "pretrain.h5")
    model_target.set_weights(model.get_weights())

    env.make_interactive(port=6666)

    aicrowd_helper.training_start()
    frame_count = 0
    episodes = 1024
    for episode in range(episodes):
        if (checkpoint_dir / f"episode-{episode}.h5").exists():
            if not (checkpoint_dir / f"episode-{episode + 1}.h5").exists():
                model.load_weights(checkpoint_dir / f"episode-{episode}.h5")
            if epsilon > epsilon_min:
                epsilon -= (epsilon_start - epsilon_min) / explore_ts
            frame_count += 6000
            continue

        obs = env.reset()
        done = False
        netr = 0

        epoch_loss = []
        with alive_bar(title=f"episode: {episode}") as bar:
            while not done:
                explore = np.random.rand() < epsilon
                if explore:
                    bar.text("perform action: explore")
                    action = env.action_space.sample()
                else:
                    bar.text("perform action: predict")
                    action = env.action_space.noop()
                    action["vector"] = model.predict([obs["pov"].reshape(-1, 64, 64, 3), obs["vector"].reshape(-1, 64)])[0]
                new_obs, reward, done, info = env.step(action)
                netr += reward

                memory.append((obs, action, reward, new_obs, done))
                # Make sure we restrict memory size to specified limit
                if len(memory) > memory_size:
                    memory.pop(0)

                if frame_count % train_interval == 0:
                    bar.text("training: build replay")
                    replay = random.sample(memory, min(batch_size, len(memory)))
                    states_pov = np.array([a[0]["pov"] for a in replay]).reshape(-1, 64, 64, 3)
                    states_vector = np.array([a[0]["vector"] for a in replay]).reshape(-1, 64)
                    # new_states_pov = np.array([a[3]["pov"] for a in replay]).reshape(-1, 64, 64, 3)
                    # new_states_vector = np.array([a[3]["vector"] for a in replay]).reshape(-1, 64)

                    # Predict the expected utility of current state and new state
                    bar.text("training: predict Q")
                    Q = model_target.predict([states_pov, states_vector])
                    Q_new = [a[2] for a in replay] + gamma * tf.reduce_max(
                        Q, axis=1
                    )

                    # masks = tf.one_hot([a[1]["vector"] for a in replay], 64)

                    bar.text("training: backprop")
                    with tf.GradientTape() as tape:
                        # Train the model on the states and updated Q-values
                        q_values = model([states_pov, states_vector])

                        # Apply the masks to the Q-values to get the Q-value for action taken
                        # q_action = tf.reduce_sum(tf.multiply(q_values, masks), axis=1)
                        q_action = tf.reduce_sum(q_values, axis=1)
                        # Calculate loss between new Q-value and old Q-value
                        loss = loss_function(Q_new, q_action)
                        grads = tape.gradient(loss, model.trainable_variables)
                        optimizer.apply_gradients(zip(grads, model.trainable_variables))
                    epoch_loss.append(loss)

                if epsilon > epsilon_min:
                    epsilon -= (epsilon_start - epsilon_min) / explore_ts
                print("explore:", explore, "net reward:", netr, "loss:", loss, "epsilon:", epsilon)
                bar()
                obs = new_obs
                if frame_count % target_update_interval == 0:
                    print("updated target model")
                    model_target.set_weights(model.get_weights())
                frame_count += 1
        model.save_weights(checkpoint_dir / f"episode-{episode}.h5")

        aicrowd_helper.register_progress(episode / episodes)

    # Save trained model to train/ directory
    # Training 100% Completed
    aicrowd_helper.register_progress(1)
    aicrowd_helper.training_end()
    env.close()
Example #3
0
import aicrowd_helper
import train_submission_code
import test_framework

import os
EVALUATION_RUNNING_ON = os.getenv('EVALUATION_RUNNING_ON', None)
EVALUATION_STAGE = os.getenv('EVALUATION_STAGE', 'all')
EXITED_SIGNAL_PATH = os.getenv('EXITED_SIGNAL_PATH', 'shared/exited')

# Training Phase
if EVALUATION_STAGE in ['all', 'training']:
    aicrowd_helper.training_start()
    try:
        train_submission_code.main()
        aicrowd_helper.training_end()
    except Exception as e:
        aicrowd_helper.training_error()
        print(e)

# Testing Phase
if EVALUATION_STAGE in ['all', 'testing']:
    if EVALUATION_RUNNING_ON in ['local']:
        try:
            os.remove(EXITED_SIGNAL_PATH)
        except FileNotFoundError:
            pass
    aicrowd_helper.inference_start()
    try:
        test_framework.main()
        aicrowd_helper.inference_end()
    except Exception as e:
Example #4
0
def main():
    malmo_base_port = FLAGS.malmo_base_port
    os.environ["CUDA_VISIBLE_DEVICES"] = FLAGS.gpus
    malmo.InstanceManager.configure_malmo_base_port(malmo_base_port)

    observation_space = CustomObservationSpace(
        pov_resolution=FLAGS.pov_resolution,
        pov_color_space=FLAGS.pov_color_space)

    action_space = CustomActionSpace(
        num_camera_actions=FLAGS.num_camera_actions,
        camera_max_angle=FLAGS.camera_max_angle)

    def combined_actor_critic_agent():
        return ResnetLSTMAgent(observation_space=observation_space,
                               action_space=action_space,
                               max_step_mul=FLAGS.max_step_mul,
                               core_hidden_size=FLAGS.lstm_hidden_size,
                               use_prev_actions=FLAGS.use_prev_actions,
                               action_embed_type=FLAGS.action_embed_type,
                               action_embed_size=FLAGS.action_embed_size)

    def separate_actor_critic_agent():
        return SeparateActorCriticWrapperAgent(
            actor=combined_actor_critic_agent(),
            critic=combined_actor_critic_agent())

    if FLAGS.separate_actor_critic:
        agent_fn = separate_actor_critic_agent
    else:
        agent_fn = combined_actor_critic_agent

    log_dir = FLAGS.logdir

    # Training Phase
    if EVALUATION_STAGE in ['all', 'training']:
        # only write out flags when training
        pathlib.Path(log_dir).mkdir(parents=True, exist_ok=True)
        FLAGS.append_flags_into_file(
            f'{log_dir}/flags_{datetime.datetime.now().strftime("%Y-%m-%d_%H-%M-%S")}.cfg'
        )

        aicrowd_helper.training_start()
        try:
            train.main(log_dir=log_dir,
                       load_dir=FLAGS.loaddir,
                       observation_space=observation_space,
                       action_space=action_space,
                       max_step_mul=FLAGS.max_step_mul,
                       fixed_step_mul=FLAGS.fixed_step_mul,
                       step_mul=FLAGS.step_mul,
                       agent_fn=agent_fn,
                       seed=FLAGS.train_seed,
                       malmo_base_port=malmo_base_port)
            aicrowd_helper.training_end()
        except Exception as e:
            aicrowd_helper.training_error()
            print(traceback.format_exc())
            print(e)

    # Testing Phase
    if EVALUATION_STAGE in ['all', 'testing']:
        if EVALUATION_RUNNING_ON in ['local']:
            try:
                os.remove(EXITED_SIGNAL_PATH)
            except FileNotFoundError:
                pass
        aicrowd_helper.inference_start()
        try:
            test.main(log_dir=log_dir,
                      test_model=FLAGS.test_model,
                      observation_space=observation_space,
                      action_space=action_space,
                      fixed_step_mul=FLAGS.fixed_step_mul,
                      step_mul=FLAGS.step_mul,
                      agent_fn=agent_fn)
            aicrowd_helper.inference_end()
        except Exception as e:
            aicrowd_helper.inference_error()
            print(traceback.format_exc())
            print(e)
        if EVALUATION_RUNNING_ON in ['local']:
            from pathlib import Path
            Path(EXITED_SIGNAL_PATH).touch()

    # Launch instance manager
    if EVALUATION_STAGE in ['manager']:
        from minerl.env.malmo import launch_instance_manager
        launch_instance_manager()