Python Networks.shallow_ActorCritic 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: modules.Agents

클래스/타입: Networks

메소드/함수: shallow_ActorCritic

hotexamples.com에서의 예제들: 3

Python Networks.shallow_ActorCritic - 3개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 modules.Agents.Networks.shallow_ActorCritic에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

자주 사용되는 메소드들

보기 숨기기

shallow_ActorCritic(3)

flat_ActorCritic(2)

ActorCritic(1)

fc_ActorCritic(1)

fc_params(1)

flex_ActorCritic(1)

params(1)

예제 #1

파일 보기

파일: 0_shallowAC_train.py 프로젝트: annikc/MEMRL

env = gym.make(env_name)
plt.close()
cache_size_for_env = int(len(env.useable) * (cache_size / 100))
print(env.rewards)
rep_types = {
    'onehot': onehot,
    'random': random,
    'place_cell': place_cell,
    'sr': sr,
    'latent': latents
}
state_reps, representation_name, input_dims, _ = rep_types[rep_type](env)

# load weights to head_ac network from previously learned agent
AC_head_agent = nets.shallow_ActorCritic(input_dims,
                                         hidden_dims=200,
                                         output_dims=env.action_space.n,
                                         lr=learning_rate)

memory = None  #Memory.EpisodicMemory(cache_limit=cache_size_for_env, entry_size=env.action_space.n)

agent = Agent(AC_head_agent, memory=memory, state_representations=state_reps)
#from torchsummary import summary
#print(summary(AC_head_agent, (2,20,20)))

run = expt(agent, env)
run.run(NUM_TRIALS=num_trials, NUM_EVENTS=num_events)
run.record_log(env_name,
               representation_name,
               num_trials,
               num_events,
               dir=directory,

예제 #2

파일 보기

plt.close()
rep_types = {
    'unstructured': onehot,
    'random': random,
    'place_cell': place_cell,
    'structured': sr,
    'conv': convs
}
if rep == 'conv':
    # saved weights
    saved_network = torch.load(
        '../../Data/agents/6a956906-c06c-47ef-aad1-3593fb9068d1.pt')

    # load agent weights into new network
    network = nets.shallow_ActorCritic(input_dims=600,
                                       hidden_dims=400,
                                       output_dims=4,
                                       lr=5e-4)
    new_state_dict = {}
    for key in saved_network.keys():
        if key[0:6] == 'output':
            if key[7] == '0':
                new_key = 'pol' + key[8:]
                new_state_dict[new_key] = saved_network[key]
            elif key[7] == '1':
                new_key = 'val' + key[8:]
                new_state_dict[new_key] = saved_network[key]
        elif key[0:8] == 'hidden.5':
            new_key = 'hidden' + key[8:]
            new_state_dict[new_key] = saved_network[key]

    network.load_state_dict(new_state_dict)

예제 #3

파일 보기


num_trials = 1000
num_events = 250

# make gym environment
env = gym.make(env_name)
plt.close()
cache_size_for_env = int(len(env.useable)*(cache_size/100))
print(env.rewards)
rep_types = {'onehot':onehot, 'random':random, 'place_cell':place_cell, 'sr':sr, 'latent':latents}
state_reps, representation_name, input_dims, _ = rep_types[rep_type](env)


# load weights to head_ac network from previously learned agent
AC_head_agent = nets.shallow_ActorCritic(input_dims, 200, env.action_space.n, lr=learning_rate)


if load_from != None:
    AC_head_agent.load_state_dict(torch.load(directory+f'agents/{load_from}.pt'))
    print(f"weights loaded from {load_from}")

memory = None#Memory.EpisodicMemory(cache_limit=cache_size_for_env, entry_size=env.action_space.n)

agent = Agent(AC_head_agent, memory=memory, state_representations=state_reps)


run = expt(agent, env)
run.run(NUM_TRIALS=num_trials, NUM_EVENTS=num_events)
print([(x, len(run.data[x])) for x in run.data.keys()])
run.record_log(env_name, representation_name,num_trials,num_events,dir=directory, file=write_to_file, load_from=load_from, extra=['randomwalk'])