Esempio n. 1
0
 def __init__(self, env, agent, replay_buffer, arm):
     self.env = env
     self.agent = agent
     self.replay_buffer = replay_buffer
     self.obs = env.reset(arm)
     self.done = False
     self.arm = arm
     self.observation_steps = 50  # 200, need to change average to match in train.py
     self.stacked_frames = 0
     self.episode = 0
     self.video_record = True
     self.frames_total = 0
     self.state = self.env.getInputImage()
     self.state, self.stacked_frames = stack_frames(stack_frames,
                                                    self.state, True)
     self.score = []
     self.video_array = []
     self.distance = self.env.calcDistance()
     self.solved = False
     self.index = 0
     if self.episode % cons.VIDEO_INTERVAL == 0:
         self.video_record = True
     else:
         self.video_record = False
     self.video_array.append(self.env.getVideoImage())
Esempio n. 2
0
# Reset the graph
tf.reset_default_graph()

# Instantiate the DQNetwork
DQNetwork = DQNetwork(state_size, action_size, learning_rate)

# Instantiate memory
### create empty memory
memory = Memory(max_size=memory_size)
for i in range(pretrain_length):
    # If it's the first step
    if i == 0:
        state = env.reset()

        state, stacked_frames = stack_frames(stacked_frames, state, True, stack_size)

    # Get the next_state, the rewards, done by taking a random action
    choice = random.randint(1, len(possible_actions)) - 1
    action = possible_actions[choice]
    next_state, reward, done, _ = env.step(action)

    # env.render()

    # Stack the frames
    next_state, stacked_frames = stack_frames(stacked_frames, next_state, False, stack_size)

    # If the episode is finished (we're dead 3x)
    if done:
        # We finished the episode
        next_state = np.zeros(state.shape)
Esempio n. 3
0
    temperature = -1

    size = (512, 512)

    time.sleep(2)

    start_time = time.time()

    while frames_total < cons.num_frames:

        episode += 1

        state = sim.get_input_image()

        state, stacked_frames = stack_frames(stacked_frames, state, True,
                                             cons.num_frames_stacked)

        score = []

        video_array = []

        distance = sim.calc_distance()

        solved = False

        index = 0

        if episode % cons.video_interval == 0:
            video_record = True
        else:
            video_record = False
Esempio n. 4
0
# last speaker for scoring
score_speakers = [speakers[i] for i in ids[2:]]
# ====== generate training and validating data ====== #
X_train = []
y_train = []
X_score = []
y_score = []
first_sample = None
for name in all_name:
    features = feat_data[name]
    # change the INPUT_FEATURE index to
    # use different feature for training
    x = features[INPUT_FEATURE]
    num_frames, num_features = x.shape
    # adding context window
    x = stack_frames(x, frame_length=CONTEXT_LENGTH * 2 + 1)
    # sequencing the image
    x = np.reshape(x,
                   newshape=(num_frames, CONTEXT_LENGTH * 2 + 1, num_features))
    y = [int(name.split('_')[0])] * len(x)
    # add to appropriate set
    if any(spk in name for spk in train_speakers):
        X_train.append(x)
        y_train += y
        if first_sample is None:
            first_sample = name
    else:
        X_score.append(x)
        y_score += y
# ====== merge all array into a matrix ====== #
X_train = np.concatenate(X_train, axis=0)
Esempio n. 5
0
X_train = []
y_train = []
X_score = []
y_score = []
first_sample = None
for name in all_name:
    features = feat_data[name]
    # change the INPUT_FEATURE index to
    # use different feature for training
    x = features[INPUT_FEATURE]
    num_frames, num_features = x.shape
    # adding context window
    # TODO: replace `None` with appropriate `frame_length` based on
    # `CONTEXT_LENGTH`, remember we have left context, right context
    # and main frame in the middle
    x = stack_frames(x, frame_length=None)
    # sequencing the image
    x = np.reshape(x,
                   newshape=(num_frames, CONTEXT_LENGTH * 2 + 1, num_features))
    y = [int(name.split('_')[0])] * len(x)
    # add to appropriate set
    if any(spk in name for spk in train_speakers):
        X_train.append(x)
        y_train += y
        if first_sample is None:
            first_sample = name
    else:
        X_score.append(x)
        y_score += y
# ====== merge all array into a matrix ====== #
X_train = np.concatenate(X_train, axis=0)
Esempio n. 6
0
#     def add(self, experience):
#         self.buffer.append(experience)
    
#     def sample(self, batch_size):
#         buffer_size = len(self.buffer)
#         index = np.random.choice(np.arange(buffer_size),
#                                 size = batch_size,
#                                 replace = False)
#         return [self.buffer[i] for i in index]

memory = Memory(max_size = memory_size)
stacked_frames  =  deque([np.zeros((84,84), dtype=np.int) for i in range(stack_size)], maxlen=stack_size) 
for i in range(pretrain_length):
    if i == 0:
        frame = env.reset()
        stacked_frames, stacked_state = utils.stack_frames(stacked_frames, frame, is_new_episode)
    action = np.random.randint(0, action_size)
    new_frame, reward, done, _ = env.step(action)
    stacked_frames, new_stacked_state = utils.stack_frames(stacked_frames, new_frame, is_new_episode)
    memory.add((stacked_state, action, reward, new_stacked_state, done))
    if done:
        frame = env.reset()
        is_new_episode = True
        stacked_frames, stacked_state = utils.stack_frames(stacked_frames, frame, is_new_episode)
        is_new_episode = False
    else:
        stacked_state = new_stacked_state.copy()


"""
This function will do the part