def init(device_name, model_name): #if model_name is None: #raise ValueError("No model given!") global device, net, last_h, last_c device = torch.device(device_name) #net = cnn_fc.CNN_FC_DQN(NUM_ACTIONS) net = fc_lstm.FC_LSTM(INPUT_SHAPE, NUM_ACTIONS) net.train(False) # set training mode to false to deactivate dropout layer net.load_state_dict(torch.load(model_name, map_location=device)) net.to(device) reset_hidden()
def __init__(self, device_name, model_name, num_observations, num_envs, num_threads, training_data_path): assert (num_envs == 1) self.device = torch.device(device_name) if USE_GRU: self.net = fc_gru.FC_GRU(num_observations, NUM_ACTIONS) else: self.net = fc_lstm.FC_LSTM(num_observations, NUM_ACTIONS) self.net.train( False) # set training mode to false to deactivate dropout layer self.net.to(self.device) if model_name != None: self.net.load_state_dict( torch.load(model_name, map_location=device)) self.reset_hidden()
def __init__(self, device_name, model_name, num_observations, num_envs, num_threads, training_data_path): assert(num_envs == 1) ### test stuff ### self.episode_idx = 0 if model_name is None: print("WARNING: No Model given! Add \'--model <model_name>\' to specifie a path to a model file to be loaded as initial model weights by the agent.") self.model_name = model_name ### --- ### self.device = torch.device(device_name) if USE_GRU: self.net = fc_gru.FC_GRU(num_observations, NUM_ACTIONS) else: self.net = fc_lstm.FC_LSTM(num_observations, NUM_ACTIONS) self.net.train(False)# set training mode to false to deactivate dropout layer self.net.to(self.device) if model_name != None: self.net.load_state_dict(torch.load(model_name, map_location=self.device)) self.reset_hidden()
def __init__(self, device_name, model_name, num_observations, num_envs, num_threads, training_data_path): assert (num_envs == 1) self.device = torch.device(device_name) self.training_data_path = training_data_path # creating xp buffers on gpu for faster sampling self.tensor_state_buffer = torch.zeros(MEMORY_SIZE, num_observations, dtype=torch.float).to( self.device) # state # rewards with applied N-Step: buffer[t] = reward_t + discount*buffer[t+1] + ... discount^(N-1)*buffer[t+N-1] self.tensor_reward_buffer = torch.zeros( MEMORY_SIZE, dtype=torch.float).to(self.device) self.tensor_action_buffer = torch.zeros( MEMORY_SIZE, dtype=torch.long).to(self.device) # the action that was chosen self.tensor_done_buffer = torch.zeros( MEMORY_SIZE, dtype=torch.bool).to(self.device) # episode has ended self.tensor_step_buffer = torch.zeros( MEMORY_SIZE, dtype=torch.int16).to( self.device) # step index in episode (starting at 0) # creating net and target net if USE_GRU: self.net = fc_gru.FC_GRU(num_observations, NUM_ACTIONS) self.tgt_net = fc_gru.FC_GRU(num_observations, NUM_ACTIONS) else: self.net = fc_lstm.FC_LSTM(num_observations, NUM_ACTIONS) self.tgt_net = fc_lstm.FC_LSTM(num_observations, NUM_ACTIONS) # copy to device self.net.to(self.device) self.tgt_net.to(self.device) # showing net to user print(self.net) self.episode_start = 0 # load model parameters from file if given if model_name is not None: self.net.load_state_dict(torch.load(model_name)) self.tgt_net.load_state_dict(self.net.state_dict()) # initialize epsilon for epsilon-greedy algorithm self.epsilon = EPSILON_START # create optimizer self.optimizer = optim.Adam(self.net.parameters(), lr=LEARNING_RATE) # mean rewards self.best_mean_reward = None self.mean_reward = 0 self.new_state_discount = DISCOUNT_FACTOR**N_STEPS # time metrics self.sampling_times = deque(maxlen=100) self.batch_forward_times = deque(maxlen=100) self.loss_calc_times = deque(maxlen=100) self.backward_times = deque(maxlen=100) self.optimize_times = deque(maxlen=100) self.gpu_pre_copy_times = deque(maxlen=100) self.gpu_copy_times = deque(maxlen=100) self.measure_gpu_times = False # training metrics self.mean_loss_buffer = deque(maxlen=100) self.mean_value_buffer = deque(maxlen=100) self.mean_loss = 0 self.mean_value = 0 # initializing frame indicies self.frame_idx = 0 self.last_episode_frame = 0 self.episode_idx = self.episode_start self.training_done = False # getting current time self.last_time = time.perf_counter() self.start_time = time.time() # reset state self.reset()