Python ReplayBuffer.k Exemples

Langage de programmation: Python

Espace de nommage/Pack: replay_buffer

Class/Type: ReplayBuffer

Méthode/Fonction: k

Exemples au hotexamples.com: 1

Python ReplayBuffer.k - 1 exemples trouvés. Ce sont les exemples réels les mieux notés de replay_buffer.ReplayBuffer.k extraits de projets open source. Vous pouvez noter les exemples pour nous aider à en améliorer la qualité.

Méthodes fréquemment utilisées

Afficher Cacher

ReplayBuffer(30)

add(30)

count(26)

get_batch(25)

append(17)

encode_recent_observation(7)

getBatch(7)

add_sample(7)

insert(7)

clear(6)

load(5)

load_memory(5)

can_sample(4)

add_episode(4)

add_transition(4)

get_last_steps(3)

get_size(3)

create_batch(3)

is_ready(3)

add_experience(3)

add_to_memory(2)

make_index(2)

compute_values(2)

compute_value_difference(2)

compute_reward_distances(2)

compute_returns(2)

compute_lambda_returns(2)

fetch_sample(2)

compute_episode_boundaries(2)

encoder_recent_observation(2)

idx(2)

get_batch_data(1)

get_current_size(1)

get_experiences(1)

get_len(1)

get_current_state(1)

load_replay(1)

get_losses_offline(1)

get_memory(1)

get_minibatch(1)

importance_sampling(1)

in_order_iterate(1)

insert_sample(1)

k(1)

length(1)

lenth(1)

load_from_file(1)

get_random_minibatch(1)

dump(1)

get(1)

Méthodes fréquemment utilisées

ReplayBuffer (30)

add (30)

count (26)

get_batch (25)

append (17)

encode_recent_observation (7)

getBatch (7)

add_sample (7)

insert (7)

clear (6)

Méthodes fréquemment utilisées

load (5)

load_memory (5)

can_sample (4)

add_episode (4)

add_transition (4)

get_last_steps (3)

get_size (3)

create_batch (3)

is_ready (3)

add_experience (3)

add_to_memory (2)

make_index (2)

compute_values (2)

compute_value_difference (2)

compute_reward_distances (2)

compute_returns (2)

compute_lambda_returns (2)

fetch_sample (2)

compute_episode_boundaries (2)

encoder_recent_observation (2)

Méthodes fréquemment utilisées

add_to_memory (2)

make_index (2)

compute_values (2)

compute_value_difference (2)

compute_reward_distances (2)

compute_returns (2)

compute_lambda_returns (2)

fetch_sample (2)

compute_episode_boundaries (2)

encoder_recent_observation (2)

idx (2)

get_batch_data (1)

get_current_size (1)

get_experiences (1)

get_len (1)

get_current_state (1)

load_replay (1)

get_losses_offline (1)

get_memory (1)

get_minibatch (1)

importance_sampling (1)

in_order_iterate (1)

insert_sample (1)

k (1)

length (1)

lenth (1)

load_from_file (1)

get_random_minibatch (1)

dump (1)

get (1)

Méthodes fréquemment utilisées

idx (2)

get_batch_data (1)

get_current_size (1)

get_experiences (1)

get_len (1)

get_current_state (1)

load_replay (1)

get_losses_offline (1)

get_memory (1)

get_minibatch (1)

importance_sampling (1)

in_order_iterate (1)

insert_sample (1)

k (1)

length (1)

lenth (1)

load_from_file (1)

get_random_minibatch (1)

dump (1)

get (1)

batch_load (1)

_count (1)

_ptr (1)

addAbsorbing (1)

add_batch (1)

add_data (1)

add_datapoints (1)

add_effects (1)

add_errors (1)

add_expert (1)

add_tuples (1)

adds (1)

as_dataset (1)

buffer_init (1)

full (1)

buffered (1)

cache (1)

clear_buffer (1)

create_episode (1)

current_count (1)

Exemple #1

0

Afficher le fichier

Fichier : train.py Projet : ChrisProgramming2018/kuka_project

def train_agent(args): """ Args: """ # create CNN convert the [1,3,84,84] to [1, 200] now = datetime.now() dt_string = now.strftime("%d_%m_%Y_%H:%M:%S") torch.manual_seed(args.seed) np.random.seed(args.seed) pathname = str(args.locexp) + "/" + str(args.env_name) + '_agent_' + str(args.policy) pathname += "_batch_size_" + str(args.batch_size) + "_lr_act_" + str(args.lr_actor) pathname += "_lr_critc_" + str(args.lr_critic) + "_lr_decoder_" arg_text = str(args) write_into_file(pathname, arg_text) tensorboard_name = str(args.locexp) + '/runs/' + pathname writer = SummaryWriter(tensorboard_name) size = args.size env= gym.make(args.env_name, renderer='egl') state = env.reset() print("state ", state.shape) state_dim = 200 print("State dim, " , state_dim) action_dim = 5 print("action_dim ", action_dim) max_action = 1 args.target_entropy=-np.prod(action_dim) args.max_episode_steps = 200 file_name = str(args.locexp) + "/pytorch_models/{}".format(args.env_name) obs_shape = (args.history_length, size, size) action_shape = (action_dim,) print("obs", obs_shape) print("act", action_shape) policy = TQC(state_dim, action_dim, max_action, args) replay_buffer = ReplayBuffer(obs_shape, action_shape, int(args.buffer_size), args.image_pad, args.device) total_timesteps = 0 timesteps_since_eval = 0 episode_num = 0 done = True t0 = time.time() scores_window = deque(maxlen=100) episode_reward = 0 evaluations = [] tb_update_counter = 0 # TODO: evaluate evaluations.append(evaluate_policy(policy, writer, total_timesteps, args, env)) save_model = file_name + '-{}reward_{:.2f}-agent{}'.format(episode_num, evaluations[-1], args.policy) policy.save(save_model) done_counter = deque(maxlen=100) while total_timesteps < args.max_timesteps: tb_update_counter += 1 # If the episode is done if done: episode_num += 1 #env.seed(random.randint(0, 100)) scores_window.append(episode_reward) average_mean = np.mean(scores_window) if total_timesteps > args.start_timesteps and episode_num % args.update_beta_freq == 0: replay_buffer.save_memory(str(args.locexp)) print("save memory", args.locexp) #policy.update_beta(replay_buffer, writer, total_timesteps) if tb_update_counter > args.tensorboard_freq: print("Write tensorboard") tb_update_counter = 0 writer.add_scalar('Reward', episode_reward, total_timesteps) writer.add_scalar('Reward mean ', average_mean, total_timesteps) writer.flush() # If we are not at the very beginning, we start the training process of the model if total_timesteps != 0: if episode_timesteps < 50: done_counter.append(1) else: done_counter.append(0) goals = sum(done_counter) text = "Total Timesteps: {} Episode Num: {} ".format(total_timesteps, episode_num) text += "Episode steps {} ".format(episode_timesteps) text += "Goal last 100 ep : {} ".format(goals) text += "Reward: {:.2f} Average Re: {:.2f} Time: {}".format(episode_reward, np.mean(scores_window), time_format(time.time()-t0)) writer.add_scalar('Goal_freq', goals, total_timesteps) print(text) write_into_file(pathname, text) #policy.train(replay_buffer, writer, episode_timesteps) # We evaluate the episode and we save the policy if timesteps_since_eval >= args.eval_freq: timesteps_since_eval %= args.eval_freq evaluations.append(evaluate_policy(policy, writer, total_timesteps, args, env)) torch.manual_seed(args.seed) np.random.seed(args.seed) evaluations.append(evaluate_policy(policy, writer, total_timesteps, args, env)) save_model = file_name + '-{}reward_{:.2f}-agent{}'.format(episode_num, evaluations[-1], args.policy) policy.save(save_model) # When the training step is done, we reset the state of the environment state = env.reset() obs, state_buffer = stacked_frames(state, size, args, policy) # Set the Done to False done = False # Set rewards and episode timesteps to zero episode_reward = 0 episode_timesteps = 0 # reset k in memory replay_buffer.k = 0 # Before 10000 timesteps, we play random actions if total_timesteps < args.start_timesteps: action = env.action_space.sample() else: # After 10000 timesteps, we switch to the model action = policy.select_action(obs) # The agent performs the action in the environment, then reaches the next state and receives the reward new_obs, reward, done, _ = env.step(action) # print(reward) #frame = cv2.imshow("wi", np.array(new_obs)) #cv2.waitKey(10) done = float(done) new_obs, state_buffer = create_next_obs(new_obs, size, args, state_buffer, policy) # We check if the episode is done #done_bool = 0 if episode_timesteps + 1 == env._max_episode_steps else float(done) done_bool = 0 if episode_timesteps + 1 == args.max_episode_steps else float(done) if episode_timesteps + 1 == args.max_episode_steps: done = True # We increase the total reward reward = reward * args.reward_scalling episode_reward += reward # We store the new transition into the Experience Replay memory (ReplayBuffer) if args.debug: print("add to buffer next_obs ", obs.shape) print("add to bufferobs ", new_obs.shape) replay_buffer.add(obs, action, reward, new_obs, done, done_bool) # We update the state, the episode timestep, the total timesteps, and the timesteps since the evaluation of the policy obs = new_obs if total_timesteps > args.start_timesteps: for i in range(args.repeat_update): policy.train(replay_buffer, writer, 1) episode_timesteps += 1 total_timesteps += 1 timesteps_since_eval += 1 # We add the last policy evaluation to our list of evaluations and we save our model evaluations.append(evaluate_policy(policy, writer, total_timesteps, args, episode_num))