def __init__(self, env, agent, replay_buffer, arm): self.env = env self.agent = agent self.replay_buffer = replay_buffer self.obs = env.reset(arm) self.done = False self.arm = arm self.observation_steps = 50 # 200, need to change average to match in train.py self.stacked_frames = 0 self.episode = 0 self.video_record = True self.frames_total = 0 self.state = self.env.getInputImage() self.state, self.stacked_frames = stack_frames(stack_frames, self.state, True) self.score = [] self.video_array = [] self.distance = self.env.calcDistance() self.solved = False self.index = 0 if self.episode % cons.VIDEO_INTERVAL == 0: self.video_record = True else: self.video_record = False self.video_array.append(self.env.getVideoImage())
# Reset the graph tf.reset_default_graph() # Instantiate the DQNetwork DQNetwork = DQNetwork(state_size, action_size, learning_rate) # Instantiate memory ### create empty memory memory = Memory(max_size=memory_size) for i in range(pretrain_length): # If it's the first step if i == 0: state = env.reset() state, stacked_frames = stack_frames(stacked_frames, state, True, stack_size) # Get the next_state, the rewards, done by taking a random action choice = random.randint(1, len(possible_actions)) - 1 action = possible_actions[choice] next_state, reward, done, _ = env.step(action) # env.render() # Stack the frames next_state, stacked_frames = stack_frames(stacked_frames, next_state, False, stack_size) # If the episode is finished (we're dead 3x) if done: # We finished the episode next_state = np.zeros(state.shape)
temperature = -1 size = (512, 512) time.sleep(2) start_time = time.time() while frames_total < cons.num_frames: episode += 1 state = sim.get_input_image() state, stacked_frames = stack_frames(stacked_frames, state, True, cons.num_frames_stacked) score = [] video_array = [] distance = sim.calc_distance() solved = False index = 0 if episode % cons.video_interval == 0: video_record = True else: video_record = False
# last speaker for scoring score_speakers = [speakers[i] for i in ids[2:]] # ====== generate training and validating data ====== # X_train = [] y_train = [] X_score = [] y_score = [] first_sample = None for name in all_name: features = feat_data[name] # change the INPUT_FEATURE index to # use different feature for training x = features[INPUT_FEATURE] num_frames, num_features = x.shape # adding context window x = stack_frames(x, frame_length=CONTEXT_LENGTH * 2 + 1) # sequencing the image x = np.reshape(x, newshape=(num_frames, CONTEXT_LENGTH * 2 + 1, num_features)) y = [int(name.split('_')[0])] * len(x) # add to appropriate set if any(spk in name for spk in train_speakers): X_train.append(x) y_train += y if first_sample is None: first_sample = name else: X_score.append(x) y_score += y # ====== merge all array into a matrix ====== # X_train = np.concatenate(X_train, axis=0)
X_train = [] y_train = [] X_score = [] y_score = [] first_sample = None for name in all_name: features = feat_data[name] # change the INPUT_FEATURE index to # use different feature for training x = features[INPUT_FEATURE] num_frames, num_features = x.shape # adding context window # TODO: replace `None` with appropriate `frame_length` based on # `CONTEXT_LENGTH`, remember we have left context, right context # and main frame in the middle x = stack_frames(x, frame_length=None) # sequencing the image x = np.reshape(x, newshape=(num_frames, CONTEXT_LENGTH * 2 + 1, num_features)) y = [int(name.split('_')[0])] * len(x) # add to appropriate set if any(spk in name for spk in train_speakers): X_train.append(x) y_train += y if first_sample is None: first_sample = name else: X_score.append(x) y_score += y # ====== merge all array into a matrix ====== # X_train = np.concatenate(X_train, axis=0)
# def add(self, experience): # self.buffer.append(experience) # def sample(self, batch_size): # buffer_size = len(self.buffer) # index = np.random.choice(np.arange(buffer_size), # size = batch_size, # replace = False) # return [self.buffer[i] for i in index] memory = Memory(max_size = memory_size) stacked_frames = deque([np.zeros((84,84), dtype=np.int) for i in range(stack_size)], maxlen=stack_size) for i in range(pretrain_length): if i == 0: frame = env.reset() stacked_frames, stacked_state = utils.stack_frames(stacked_frames, frame, is_new_episode) action = np.random.randint(0, action_size) new_frame, reward, done, _ = env.step(action) stacked_frames, new_stacked_state = utils.stack_frames(stacked_frames, new_frame, is_new_episode) memory.add((stacked_state, action, reward, new_stacked_state, done)) if done: frame = env.reset() is_new_episode = True stacked_frames, stacked_state = utils.stack_frames(stacked_frames, frame, is_new_episode) is_new_episode = False else: stacked_state = new_stacked_state.copy() """ This function will do the part