コード例 #1
0
def main():
    args = get_config()

    env = maze_env(args)
    args.input_size = 12
    args.num_actions = 4

    session = tf.Session()
    # TYPE SS:
    if args.exp_type == 'SS':
        agent_one = agent.selfish_agent(args, session, name_scope='Selfish_A')
        agent_two = agent.selfish_agent(args, session, name_scope='Selfish_B')
    elif args.exp_type == 'SN':
        agent_one = agent.naive_agent(args, session, name_scope='Naive_A')
        agent_two = agent.selfish_agent(args, session, name_scope='Selfish_B')
    elif args.exp_type == 'SP':
        agent_one = agent.punishment_agent(args,
                                           session,
                                           name_scope='Punish_A')
        agent_two = agent.selfish_agent(args, session, name_scope='Selfish_B')
    elif args.exp_type == 'AN':
        agent_one = agent.adaptive_agent(args,
                                         session,
                                         name_scope='Adaptive_A')
        agent_two = agent.naive_agent(args, session, name_scope='Naive_B')
    elif args.exp_type == 'AS':
        agent_one = agent.adaptive_agent(args,
                                         session,
                                         name_scope='Adaptive_A')
        agent_two = agent.selfish_agent(args, session, name_scope='Selfish_B')
    elif args.exp_type == 'AA':
        agent_one = agent.adaptive_agent(args,
                                         session,
                                         name_scope='Adaptive_A')
        agent_two = agent.adaptive_agent(args,
                                         session,
                                         name_scope='Adaptive_B')

    session.run(tf.global_variables_initializer())

    for i_iteration in range(args.num_iteration):

        logger.info('Current Iteration {}'.format(i_iteration))

        for _ in range(args.episode_per_batch):

            policy_rollout(env, agent_one, agent_two)

        agent_one.train_step()
        agent_two.train_step()

    agent_one.save_npy(args.task, agent_one._name_scope)
    agent_two.save_npy(args.task, agent_two._name_scope)
コード例 #2
0
HIGH_INT_DIM = True
N_SAMPLES = 200000
samples_transfer = 100

if __name__ == "__main__":
    logging.basicConfig(level=logging.INFO)

    # --- Parse parameters ---
    parameters = process_args(sys.argv[1:], Defaults)
    if parameters.deterministic:
        rng = np.random.RandomState(123456)
    else:
        rng = np.random.RandomState()

    # --- Instantiate environment ---
    env = maze_env(rng, higher_dim_obs=HIGHER_DIM_OBS)

    # --- Instantiate learning_algo ---
    learning_algo = CRAR(env,
                         parameters.rms_decay,
                         parameters.rms_epsilon,
                         parameters.momentum,
                         parameters.clip_norm,
                         parameters.freeze_interval,
                         parameters.batch_size,
                         parameters.update_rule,
                         rng,
                         double_Q=True,
                         high_int_dim=HIGH_INT_DIM,
                         internal_dim=3)
コード例 #3
0
    logging.basicConfig(level=logging.INFO)
    
    # --- Parse parameters ---
    parameters = process_args(sys.argv[1:], Defaults)
    input_nnet = "normal_lr" + str(parameters.learning_rate) + "_lrd" + str(parameters.learning_rate_decay)
    if parameters.deterministic:
        rng = np.random.RandomState(parameters.seed)
        print(" deterministic, seed: ",parameters.seed)
        input_nnet = "normal_seed" + str(parameters.seed) + "_lr" + str(parameters.learning_rate) + "_lrd" + str(parameters.learning_rate_decay) 
    else:
        rng = np.random.RandomState()
    if parameters.dumpname != "":
        input_nnet = parameters.dumpname
    print("input nnet= ", input_nnet)
    # --- Instantiate environment ---
    env = maze_env(rng, higher_dim_obs=parameters.high_dim_obs, show_game=False)
    
    # --- Instantiate learning_algo ---
    learning_algo = CRAR(
        env,
        rng,
        double_Q=True,
        high_int_dim=HIGH_INT_DIM,
        internal_dim=3,
        div_entrop_loss=1.)
    
    train_policy = EpsilonGreedyPolicy(learning_algo, env.nActions(), rng, 1.)
    test_policy = EpsilonGreedyPolicy(learning_algo, env.nActions(), rng, 0.1)

    # --- Instantiate agent ---
    agent = NeuralAgent(
コード例 #4
0
ファイル: q_learning.py プロジェクト: taogougou/deep_learning
np.random.seed(0)


def epsilon_greedy(Q, state):

    #如果所有元素都为真,那么返回真;否则返回假
    all = (Q[state, :] == 0).all()
    if (np.random.uniform() > 1 - EPSILON) or all:
        action = np.random.randint(0, 4)  # 0~3
    else:
        #返回每一个state行上的最大值的action下标
        action = Q[state, :].argmax()
    return action


e = maze_env()
Q = np.zeros((e.state_num, 4))
# print(Q.shape)

for i in range(200):
    e = maze_env()
    while (e.is_end is False) and (e.step < MAX_STEP):
        action = epsilon_greedy(Q, e.present_state)
        state = e.present_state
        reward = e.interact(action)
        new_state = e.present_state
        Q[state, action] = (1 - ALPHA) * Q[state, action] + \
            ALPHA * (reward + GAMMA * Q[new_state, :].max())
        print(Q)
        time.sleep(0.1)
    print('循环次数:', i, '总步数:', e.step, '总奖励数:', e.total_reward)
コード例 #5
0
ファイル: run_maze_inverse.py プロジェクト: geoffreyvd/deer
samples_transfer=100


if __name__ == "__main__":
    logging.basicConfig(level=logging.INFO)
    
    # --- Parse parameters ---
    parameters = process_args(sys.argv[1:], Defaults)
    if parameters.deterministic:
        rng = np.random.RandomState(123456)
    else:
        rng = np.random.RandomState()
    
    # --- Instantiate environment ---
    # env = maze_env(rng, higher_dim_obs=HIGHER_DIM_OBS)
    env = maze_env(rng, higher_dim_obs=HIGHER_DIM_OBS, reverse=True)
    
    # --- Instantiate learning_algo ---
    learning_algo = CRAR(
        env,
        parameters.rms_decay,
        parameters.rms_epsilon,
        parameters.momentum,
        parameters.clip_norm,
        parameters.freeze_interval,
        parameters.batch_size,
        parameters.update_rule,
        rng,
        double_Q=True,
        high_int_dim=HIGH_INT_DIM,
        internal_dim=3,
コード例 #6
0
    if parameters.deterministic:
        rng = np.random.RandomState(parameters.seed)
        print(" deterministic, seed: ",parameters.seed)
        input_transferred = "transferred_seed" + str(parameters.seed) + "_lr" + str(parameters.learning_rate) + "_lrd" + str(parameters.learning_rate_decay) 
        input_normal = "normal_seed" + str(parameters.seed) + "_lr" + str(parameters.learning_rate) + "_lrd" + str(parameters.learning_rate_decay) 
    else:
        rng = np.random.RandomState()
    if parameters.dumpname != "":
        input_transferred = parameters.dumpname + "_transferred"
        input_normal = parameters.dumpname
    print("input transferred nnt=", input_transferred)
    print("input normal nnet= ", input_normal)

    
    # --- Instantiate environment ---
    env = maze_env(rng, higher_dim_obs=parameters.high_dim_obs)
    
    # --- Instantiate learning_algo ---
    learning_algo = CRAR(
        env,
        parameters.rms_decay,
        parameters.rms_epsilon,
        parameters.momentum,
        parameters.clip_norm,
        parameters.freeze_interval,
        parameters.batch_size,
        parameters.update_rule,
        rng,
        double_Q=True,
        high_int_dim=HIGH_INT_DIM,
        internal_dim=3,
コード例 #7
0
ファイル: run_maze_inverse.py プロジェクト: geoffreyvd/deer
    fname += "_lr" + str(parameters.learning_rate) + "_lrd" + str(parameters.learning_rate_decay) 
    
    if parameters.mode == 1:
        fname += "_resetencoder"
    if parameters.mode == 2:
        fname += "_partialfreezeencoder"
    # if parameters.mode == 3:
    #     #TODO dont freeze but very small lr for others models

    print("saving nnet,plot and score under name=", fname)
    print("input nnet= ", input_nnet)


    # --- Instantiate environment ---
    # env = maze_env(rng, higher_dim_obs=HIGHER_DIM_OBS)
    env = maze_env(rng, higher_dim_obs=parameters.high_dim_obs, reverse=True)
    
    # --- Instantiate learning_algo ---
    learning_algo = CRAR(
        env,
        parameters.rms_decay,
        parameters.rms_epsilon,
        parameters.momentum,
        parameters.clip_norm,
        parameters.freeze_interval,
        parameters.batch_size,
        parameters.update_rule,
        rng,
        double_Q=True,
        high_int_dim=HIGH_INT_DIM,
        internal_dim=3,