예제 #1
0
             kernel_regularizer=l2(1e-4),
             data_format='channels_first')(cv2)
dense = Flatten()(cv3)
dense = Dense(512, activation='relu', kernel_regularizer=l2(1e-4))(dense)
buttons = Dense(nb_actions, activation='linear',
                kernel_regularizer=l2(1e-4))(dense)
model = Model(inputs=frame, outputs=buttons)
model.summary()

processor = AtariDQfDProcessor()

# record_demo_data('HeroDeterministic-v4', steps=50000, data_filepath='hero_expert.npy', frame_delay=0.03)

# Load and process the demonstration data.
expert_demo_data = processor.process_demo_data(
    load_demo_data_from_file('hero_expert.npy'))
memory = PartitionedMemory(limit=1000000,
                           pre_load_data=expert_demo_data,
                           alpha=.4,
                           start_beta=.6,
                           end_beta=.6,
                           window_length=WINDOW_LENGTH)

policy = EpsGreedyQPolicy(.01)

dqfd = DQfDAgent(model=model,
                 nb_actions=nb_actions,
                 policy=policy,
                 memory=memory,
                 processor=processor,
                 enable_double_dqn=True,
예제 #2
0
student_model = Model(inputs=sensors, outputs=s_actions)
# "Expert" (regular dqn) model architecture
e_dense = Flatten()(sensors)
e_dense = Dense(64, activation='relu')(e_dense)
e_dense2 = Dense(128, activation='relu')(e_dense)
e_dense3 = Dense(64, activation='relu')(e_dense2)
e_actions = Dense(nb_actions, activation='linear')(e_dense3)
expert_model = Model(inputs=sensors, outputs=e_actions)

processor = RocketProcessor()
model_saves = './model_saves/'

if __name__ == "__main__":
    if args.model == 'student':
        # load expert data
        expert_demo_data = load_demo_data_from_file(model_saves + 'demos.npy')
        expert_demo_data = reward_threshold_subset(expert_demo_data, 0)
        print(expert_demo_data.shape)
        expert_demo_data = processor.process_demo_data(expert_demo_data)
        # memory
        memory = PartitionedMemory(limit=500000,
                                   pre_load_data=expert_demo_data,
                                   alpha=.6,
                                   start_beta=.4,
                                   end_beta=.4,
                                   window_length=WINDOW_LENGTH)
        # policy
        policy = EpsGreedyQPolicy(.01)
        # agent
        dqfd = DQfDAgent(model=student_model,
                         nb_actions=nb_actions,
예제 #3
0
plot_model(curiosity_inverse_model,
           show_shapes=True,
           to_file=plot_file_prefix + 'curiosity_inverse_model.png')
######## END INVERSE MODEL ################

model_saves = './demonstrations/'

now = datetime.now()
datestr = now.strftime("%m%d_%H%M%S")
filename_append = args.filename_append
environment_name = args.env

if __name__ == "__main__":
    if args.model == 'student':
        # load expert data
        expert_demo_data = load_demo_data_from_file(model_saves +
                                                    demonstrations_file)
        expert_demo_data = reward_threshold_subset(expert_demo_data, 0)
        print(expert_demo_data.shape)
        expert_demo_data = processor.process_demo_data(expert_demo_data)
        # memory
        memory = PartitionedMemory(limit=500000,
                                   pre_load_data=expert_demo_data,
                                   alpha=.6,
                                   start_beta=.4,
                                   end_beta=.4,
                                   window_length=WINDOW_LENGTH)
        # policy
        policy = EpsGreedyQPolicy(.01)
        # agent
        dqfd = DQfDAgent(model=student_model,
                         nb_actions=nb_actions,
예제 #4
0
    episode_start = 0
    for i, transition in enumerate(demo_array):
        reward = transition[-2]
        reward = np.sign(reward) * np.log(1 + abs(reward))
        episode_total += reward
        if transition[-1]:  #terminal
            episode_rs[episode_start:i + 1] = episode_total
            episode_total = 0
            episode_start = i + 1

    return episode_rs


def reward_threshold_subset(demo_array, reward_min):
    rs = calc_ep_rs(demo_array)
    cropped_demos = []
    for i, transition in enumerate(demo_array):
        if rs[i] > reward_min:
            cropped_demos.append(transition)
    return np.array(cropped_demos)


def demo_avg(demo_array):
    demo_array = np.array(list(set(calc_ep_rs(demo_array))))
    return np.mean(demo_array)


if __name__ == "__main__":
    record_demo_data('RocketLander-v0', steps=50000)
    print(len(load_demo_data_from_file()))