class Face(object): def __init__(self, geoinfo, speech=""): self.geoinfo = geoinfo self.is_smiling = False self.speech = speech self.smile_sequence = [] self.omoroi_data = OmoroiData() self.graph = Graph(ylim=[ self.omoroi_data.omoroi_min - 1.0, self.omoroi_data.omoroi_max + 1.0 ], ylabel="Omorosa", scale=80, figsize=(2, 2)) self.face_images = FaceImageArray() self.mouth_images = MouthImageArray() def update(self): self.omoroi_data.update_omoroi_sequence(self.is_smiling) length = 20 omoroi_subsequence = self.omoroi_data.get_subsequence( self.omoroi_data.omoroi_sequence, length) pos = (self.geoinfo.coordinates[0][0] + self.geoinfo.length[0], self.geoinfo.coordinates[0][1] - self.geoinfo.length[1] / 2) self.graph.set_graph_data(np.arange(len(omoroi_subsequence)), omoroi_subsequence, pos=pos)
class Face(object): def __init__(self,geoinfo,speech=""): self.geoinfo = geoinfo self.is_smiling = False self.speech = speech self.smile_sequence = [] self.omoroi_data = OmoroiData() self.graph = Graph( ylim=[self.omoroi_data.omoroi_min-1.0,self.omoroi_data.omoroi_max+1.0], ylabel="Omorosa",scale=80,figsize=(2,2) ) self.face_images = FaceImageArray() self.mouth_images = MouthImageArray() def update(self): self.omoroi_data.update_omoroi_sequence(self.is_smiling) length = 20 omoroi_subsequence = self.omoroi_data.get_subsequence(self.omoroi_data.omoroi_sequence,length) pos = (self.geoinfo.coordinates[0][0]+self.geoinfo.length[0], self.geoinfo.coordinates[0][1]-self.geoinfo.length[1]/2) self.graph.set_graph_data(np.arange(len(omoroi_subsequence)), omoroi_subsequence, pos = pos)
def __init__(self, geoinfo, speech=""): self.geoinfo = geoinfo self.is_smiling = False self.speech = speech self.smile_sequence = [] self.omoroi_data = OmoroiData() self.graph = Graph(ylim=[ self.omoroi_data.omoroi_min - 1.0, self.omoroi_data.omoroi_max + 1.0 ], ylabel="Omorosa", scale=80, figsize=(2, 2)) self.face_images = FaceImageArray() self.mouth_images = MouthImageArray()
def train(): global env_name, save_name, agent_args env = gym.make(env_name) if env_name == 'DobroHalfCheetah-v0': env.unwrapped.initialize(is_render=False) agent = Agent(env, agent_args) v_loss_logger = Logger(save_name, 'v_loss') p_loss_logger = Logger(save_name, 'p_loss') score_logger = Logger(save_name, 'score') graph = Graph(1000, save_name.upper(), agent.name) episodes = int(5e5) save_freq = 1 save_period = 1000 p_losses = deque(maxlen=save_period) v_losses = deque(maxlen=save_period) entropies = deque(maxlen=save_period) scores = deque(maxlen=save_period) for episode in range(episodes): state = env.reset() agent.actor_noise.reset() done = False score = 0 step = 0 while not done: step += 1 action = agent.get_action(state, True) next_state, reward, done, info = env.step(action) agent.replay_memory.append([ np.array(state, np.float32), action, reward, done, np.array(next_state, np.float32) ]) ######################## if len(agent.replay_memory) > agent.train_start: v_loss, p_loss = agent.train() v_loss_logger.write([1, v_loss]) p_loss_logger.write([1, p_loss]) p_losses.append(p_loss) v_losses.append(v_loss) value = agent.get_value(state, action) entropies.append(value) scores.append(reward) graph.update(np.mean(scores), np.mean(p_losses), np.mean(v_losses), np.mean(entropies)) state = next_state score += reward print(episode, score, agent.epsilon) score_logger.write([step, score]) if (episode + 1) % save_freq == 0: agent.save() v_loss_logger.save() p_loss_logger.save() score_logger.save() graph.update(0, 0, 0, 0, finished=True)
def __init__(self,geoinfo,speech=""): self.geoinfo = geoinfo self.is_smiling = False self.speech = speech self.smile_sequence = [] self.omoroi_data = OmoroiData() self.graph = Graph( ylim=[self.omoroi_data.omoroi_min-1.0,self.omoroi_data.omoroi_max+1.0], ylabel="Omorosa",scale=80,figsize=(2,2) ) self.face_images = FaceImageArray() self.mouth_images = MouthImageArray()
def train(): global env_name, save_name, agent_args env = gym.make(env_name) agent = Agent(env, agent_args) v_loss_logger = Logger(save_name, 'v_loss') cost_v_loss_logger = Logger(save_name, 'cost_v_loss') kl_logger = Logger(save_name, 'kl') score_logger = Logger(save_name, 'score') cost_logger = Logger(save_name, 'cost') graph = Graph( 1000, save_name, ['score', 'cost', 'value loss', 'cost value loss', 'kl divergence']) max_steps = 4000 max_ep_len = 1000 episodes = int(max_steps / max_ep_len) epochs = 500 save_freq = 10 log_length = 10 p_objectives = deque(maxlen=log_length) c_objectives = deque(maxlen=log_length) v_losses = deque(maxlen=log_length) cost_v_losses = deque(maxlen=log_length) kl_divergence = deque(maxlen=log_length) scores = deque(maxlen=log_length * episodes) costs = deque(maxlen=log_length * episodes) for epoch in range(epochs): states = [] actions = [] targets = [] cost_targets = [] gaes = [] cost_gaes = [] avg_costs = [] ep_step = 0 while ep_step < max_steps: state = env.reset() done = False score = 0 cost = 0 step = 0 temp_rewards = [] temp_costs = [] values = [] cost_values = [] while True: step += 1 ep_step += 1 assert env.observation_space.contains(state) action, clipped_action, value, cost_value = agent.get_action( state, True) assert env.action_space.contains(clipped_action) next_state, reward, done, info = env.step(clipped_action) #for predict cost h_dist = hazard_dist(env.hazards_pos, env.world.robot_pos()) predict_cost = get_cost(h_dist) states.append(state) actions.append(action) temp_rewards.append(reward) temp_costs.append(predict_cost) values.append(value) cost_values.append(cost_value) state = next_state score += reward cost += info.get('cost', 0) #로그는 실제 cost를 남겨서, discrete한 cost랑 비교해야함. if done or step >= max_ep_len: break if step >= max_ep_len: action, clipped_action, value, cost_value = agent.get_action( state, True) else: value = 0 cost_value = 0 print("done before max_ep_len...") next_values = values[1:] + [value] temp_gaes, temp_targets = agent.get_gaes_targets( temp_rewards, values, next_values) next_cost_values = cost_values[1:] + [cost_value] temp_cost_gaes, temp_cost_targets = agent.get_gaes_targets( temp_costs, cost_values, next_cost_values) avg_costs.append(np.mean(temp_costs)) targets += list(temp_targets) gaes += list(temp_gaes) cost_targets += list(temp_cost_targets) cost_gaes += list(temp_cost_gaes) score_logger.write([step, score]) cost_logger.write([step, cost]) scores.append(score) costs.append(cost) trajs = [ states, actions, targets, cost_targets, gaes, cost_gaes, avg_costs ] v_loss, cost_v_loss, p_objective, cost_objective, kl = agent.train( trajs) v_loss_logger.write([ep_step, v_loss]) cost_v_loss_logger.write([ep_step, cost_v_loss]) kl_logger.write([ep_step, kl]) p_objectives.append(p_objective) c_objectives.append(cost_objective) v_losses.append(v_loss) cost_v_losses.append(cost_v_loss) kl_divergence.append(kl) print(np.mean(scores), np.mean(costs), np.mean(v_losses), np.mean(cost_v_losses), np.mean(kl_divergence), np.mean(c_objectives)) graph.update([ np.mean(scores), np.mean(costs), np.mean(v_losses), np.mean(cost_v_losses), np.mean(kl_divergence) ]) if (epoch + 1) % save_freq == 0: agent.save() v_loss_logger.save() cost_v_loss_logger.save() kl_logger.save() score_logger.save() cost_logger.save() graph.update(None, finished=True)
def train(): global env_name, save_name, agent_args env = gym.make(env_name) agent = Agent(env, agent_args) p_loss_logger = Logger(save_name, 'p_loss') v_loss_logger = Logger(save_name, 'v_loss') kl_logger = Logger(save_name, 'kl') score_logger = Logger(save_name, 'score') graph = Graph( 1000, save_name, ['score', 'policy loss', 'value loss', 'kl divergence', 'entropy']) episodes = 10 max_steps = 4000 max_ep_len = min(1000, env.spec.max_episode_steps) epochs = int(1e5) save_freq = 10 save_period = 10 p_losses = deque(maxlen=save_period) v_losses = deque(maxlen=save_period) kl_divergence = deque(maxlen=save_period) entropies = deque(maxlen=save_period) scores = deque(maxlen=save_period * episodes) for epoch in range(epochs): states = [] actions = [] targets = [] next_states = [] rewards = [] gaes = [] ep_step = 0 #for episode in range(episodes): while ep_step < max_steps: state = env.reset() done = False score = 0 step = 0 temp_rewards = [] values = [] while True: step += 1 ep_step += 1 action, clipped_action, value = agent.get_action(state, True) next_state, reward, done, info = env.step(clipped_action) states.append(state) actions.append(action) temp_rewards.append(reward) next_states.append(next_state) rewards.append(reward) values.append(value) state = next_state score += reward if done or step >= max_ep_len: break if step >= max_ep_len: action, clipped_action, value = agent.get_action(state, True) else: #중간에 끝난 거면, 다 돌기전에 죽어버린거니, value = 0 으로 해야함 value = 0 print("done before max_ep_len...") next_values = values[1:] + [value] temp_gaes, temp_targets = agent.get_gaes_targets( temp_rewards, values, next_values) targets += list(temp_targets) gaes += list(temp_gaes) score_logger.write([step, score]) scores.append(score) trajs = [states, actions, targets, next_states, rewards, gaes] p_loss, v_loss, kl, entropy = agent.train(trajs) p_loss_logger.write([ep_step, p_loss]) v_loss_logger.write([ep_step, v_loss]) kl_logger.write([ep_step, kl]) p_losses.append(p_loss) v_losses.append(v_loss) kl_divergence.append(kl) entropies.append(entropy) print(np.mean(scores), np.mean(p_losses), np.mean(v_losses), np.mean(kl_divergence), np.mean(entropies)) graph.update([ np.mean(scores), np.mean(p_losses), np.mean(v_losses), np.mean(kl_divergence), np.mean(entropies) ]) if (epoch + 1) % save_freq == 0: agent.save() p_loss_logger.save() v_loss_logger.save() kl_logger.save() score_logger.save() graph.update(None, finished=True)
capture = cv2.VideoCapture(0) face_recognizer = FaceRecognizer(capture=capture) speech_recognizer = SpeechRecognizer() speech_recognizer.start() w=int(capture.get(cv2.cv.CV_CAP_PROP_FRAME_WIDTH )) h=int(capture.get(cv2.cv.CV_CAP_PROP_FRAME_HEIGHT )) fourcc = cv2.cv.CV_FOURCC('m', 'p', '4', 'v') graph_drawer = GraphDrawer() graph_drawer.start() all_omorosa = OmoroiData() all_graph = Graph(color=(1.0,0.0,1.0),ylim=[all_omorosa.omoroi_min-1.0,all_omorosa.omoroi_max+1.0],ylabel="Omorosa") #if os.path.exists('movie.avi'): # os.remove('movie.avi') #out = cv2.VideoWriter('movie.avi',fourcc,7.5,(w,h)) count = 0 while(True): # 動画ストリームからフレームを取得 speech = speech_recognizer.get_speech() recogflg = speech_recognizer.recogflg print "uwaaaaaaaaa----- " + str(recogflg) # frameで切り取り画像を受け取る
def train(): global env_name, save_name, agent_args env = gym.make(env_name) env.unwrapped.initialize(is_render=False) agent = Agent(env, agent_args) v_loss_logger = Logger(save_name, 'v_loss') p_loss_logger = Logger(save_name, 'p_loss') score_logger = Logger(save_name, 'score') graph = Graph(1000, save_name.upper(), agent.name) episodes = 10 epochs = int(1e5) save_freq = 10 save_period = 100 p_losses = deque(maxlen=save_period) v_losses = deque(maxlen=save_period) entropies = deque(maxlen=save_period) scores = deque(maxlen=save_period * episodes) for epoch in range(epochs): states = [] actions = [] targets = [] ep_step = 0 for episode in range(episodes): state = env.reset() done = False score = 0 step = 0 temp_rewards = [] while not done: step += 1 ep_step += 1 action, clipped_action = agent.get_action(state, True) next_state, reward, done, info = env.step(clipped_action) states.append(state) actions.append(action) temp_rewards.append(reward) state = next_state score += reward score_logger.write([step, score]) scores.append(score) temp_targets = np.zeros_like(temp_rewards) ret = 0 for t in reversed(range(len(temp_rewards))): ret = temp_rewards[t] + agent.discount_factor * ret temp_targets[t] = ret targets += list(temp_targets) trajs = [states, actions, targets] v_loss, p_objective, kl = agent.train(trajs) v_loss_logger.write([ep_step, v_loss]) p_loss_logger.write([ep_step, p_objective]) p_losses.append(p_objective) v_losses.append(v_loss) entropies.append(kl) #print(v_loss, p_objective, kl) print(np.mean(scores), np.mean(p_losses), np.mean(v_losses), np.mean(entropies)) graph.update(np.mean(scores), np.mean(p_losses), np.mean(v_losses), np.mean(entropies)) if (epoch + 1) % save_freq == 0: agent.save() v_loss_logger.save() p_loss_logger.save() score_logger.save() graph.update(0, 0, 0, 0, finished=True)
def train(): global env_name, save_name, agent_args env = gym.make(env_name) agent = Agent(env, agent_args) v_loss_logger = Logger(save_name, 'v_loss') kl_logger = Logger(save_name, 'kl') score_logger = Logger(save_name, 'score') graph = Graph(1000, save_name, ['score', 'policy objective', 'value loss', 'kl divergence']) episodes = 10 epochs = int(1e5) save_freq = 10 save_period = 10 p_objectives = deque(maxlen=save_period) v_losses = deque(maxlen=save_period) kl_divergence = deque(maxlen=save_period) scores = deque(maxlen=save_period * episodes) for epoch in range(epochs): states = [] actions = [] targets = [] next_states = [] rewards = [] gaes = [] ep_step = 0 for episode in range(episodes): state = env.reset() done = False score = 0 step = 0 temp_rewards = [] values = [] while not done: step += 1 ep_step += 1 action, clipped_action, value = agent.get_action(state, True) if clipped_action > 0: a_t = 1 else: a_t = 0 next_state, reward, done, info = env.step(a_t) states.append(state) actions.append(action) temp_rewards.append(reward) next_states.append(next_state) rewards.append(reward) values.append(value) state = next_state score += reward score_logger.write([step, score]) scores.append(score) action, clipped_action, value = agent.get_action(state, True) next_values = values[1:] + [value] temp_gaes, temp_targets = agent.get_gaes_targets( temp_rewards, values, next_values) targets += list(temp_targets) gaes += list(temp_gaes) trajs = [states, actions, targets, next_states, rewards, gaes] v_loss, p_objective, kl = agent.train(trajs) v_loss_logger.write([ep_step, v_loss]) kl_logger.write([ep_step, kl]) p_objectives.append(p_objective) v_losses.append(v_loss) kl_divergence.append(kl) print(np.mean(scores), np.mean(p_objectives), np.mean(v_losses), np.mean(kl_divergence)) graph.update([ np.mean(scores), np.mean(p_objectives), np.mean(v_losses), np.mean(kl_divergence) ]) if (epoch + 1) % save_freq == 0: agent.save() v_loss_logger.save() kl_logger.save() score_logger.save() graph.update(None, finished=True)
def train(): global total_step, total_max_step, env_name, global_agent, step_period, gamma, \ loss_logger, score_logger, graph gamma = 0.99 num_thread = 10 total_step = 0 total_max_step = 1e6 step_period = 1e3 step_period = int(step_period / num_thread) save_name = env_name.split('-')[0] env = gym.make(env_name) global_agent = Agent("global", env, save_name, gamma) loss_logger = Logger(save_name, 'loss') score_logger = Logger(save_name, 'score') graph = Graph(1000, save_name.upper(), 'A3C') env.close() def thread_func(t_idx): global total_step, total_max_step, env_name, global_agent, step_period, gamma, \ loss_logger, score_logger, graph env = gym.make(env_name) agent = Agent("local_{}".format(t_idx), env, save_name, gamma) step = 0 episode = 0 while total_step < total_max_step: episode += 1 #gradient reset & parameter synchronize agent.update_parameter(global_agent) ### start_step = step states = [] actions = [] rewards = [] score = 0 cnt = 0 state = env.reset() while True: cnt += 1 step += 1 total_step += 1 action = agent.get_action(state, True) next_state, reward, done, info = env.step(action) ####### modify reward function ####### #reward = 200-cnt if done else 0 reward += 10 ####### modify reward function ####### states.append(state) actions.append(action) rewards.append(reward) score += reward if done or step - start_step == step_period: ret = 0 if done else agent.get_value(next_state) targets = [] for i in range(len(states)): ret = rewards[-i - 1] + gamma * ret targets.append(ret) targets = targets[::-1] p_grad, p_loss, v_grad, v_loss, entropy = agent.calc_gradient( states, actions, targets) global_agent.update_with_gradients(p_grad, v_grad) #loss_logger.write([step-start_step,p_loss,v_loss]) if done: break agent.update_parameter(global_agent) start_step = step states = [] actions = [] rewards = [] state = next_state #score_logger.write([cnt, score]) if t_idx == 0: print(score) graph.update(score, p_loss, v_loss, entropy) if episode % 100 == 0: global_agent.save() threads = [] for i in range(num_thread): threads.append(threading.Thread(target=thread_func, args=(i, ))) threads[-1].start() for thread in threads: thread.join() graph.update(0, 0, 0, 0, True)
def train(): global env_name, save_name, agent_args env = gym.make(env_name) agent = Agent(env, agent_args) score_logger = Logger(save_name, 'score') graph = Graph(1000, save_name, ['score', 'policy loss', 'Q value loss', 'entropy']) max_steps = 4000 max_ep_len = min(1000, env.spec.max_episode_steps) start_training_after_steps = 1000 step_per_training = 50 epochs = 1000 save_freq = 1 record_length = 10 p_losses = deque(maxlen=record_length * int(max_ep_len / step_per_training)) q_losses = deque(maxlen=record_length * int(max_ep_len / step_per_training)) entropies = deque(maxlen=record_length * int(max_ep_len / step_per_training)) scores = deque(maxlen=record_length) total_step = 0 for epoch in range(epochs): ep_step = 0 while ep_step < max_steps: state = env.reset() score = 0 step = 0 while True: step += 1 ep_step += 1 total_step += 1 action = agent.get_action(state, True) next_state, reward, done, info = env.step(action) done = False if step >= max_ep_len else done agent.replay_memory.append( [state, action, reward, np.float(done), next_state]) if len(agent.replay_memory) > start_training_after_steps and ( total_step + 1) % step_per_training == 0: for _ in range(step_per_training): p_loss, q_loss, entropy = agent.train() p_losses.append(p_loss) q_losses.append(q_loss) entropies.append(entropy) print(np.mean(scores), np.mean(p_losses), np.mean(q_losses), np.mean(entropies)) state = next_state score += reward if done or step >= max_ep_len: break score_logger.write([step, score]) scores.append(score) graph.update([ np.mean(scores), np.mean(p_losses), np.mean(q_losses), np.mean(entropies) ]) if (epoch + 1) % save_freq == 0: agent.save() score_logger.save() graph.update(None, finished=True)
'v_lr': 1e-3, 'p_lr': 1e-4, 'init_std': 0.0 } gamma = 0.9 save_name = env_name.split('-')[0] episodes = 1 value_epoch = 10 policy_epoch = 1 iters = int(1e4) env = gym.make(env_name) agent = Agent("a2c", env, save_name, gamma, agent_args) loss_logger = Logger(save_name, 'loss') score_logger = Logger(save_name, 'score') graph = Graph(1000, save_name.upper(), 'A2C') print_period = 100 p_losses = deque(maxlen=print_period) v_losses = deque(maxlen=print_period) value_list = deque(maxlen=print_period) entropies = deque(maxlen=print_period) scores = deque(maxlen=episodes * print_period) for total_iter in range(iters): states = [] actions = [] rewards = [] dones = [] next_states = [] for episode in range(episodes):
def train(): global total_step, total_max_step, env_name, global_agent, step_period, gamma, \ loss_logger, score_logger, graph, p_losses, v_losses, entropies, scores gamma = 0.99 num_thread = 10 total_step = 0 total_max_step = 1e7 step_period = 1e4 #1e4 step_period = int(step_period / num_thread) save_name = env_name.split('-')[0] env = gym.make(env_name) env.unwrapped.initialize(is_render=False) global_agent = Agent("global", env, save_name, gamma) loss_logger = Logger(save_name, 'loss') score_logger = Logger(save_name, 'score') graph = Graph(1000, save_name.upper(), 'A3C') env.close() p_losses = deque(maxlen=step_period) v_losses = deque(maxlen=step_period) entropies = deque(maxlen=step_period) scores = deque(maxlen=step_period) def thread_func(t_idx): global total_step, total_max_step, env_name, global_agent, step_period, gamma, \ loss_logger, score_logger, graph, p_losses, v_losses, entropies, scores env = gym.make(env_name) env.unwrapped.initialize(is_render=False) agent = Agent("local_{}".format(t_idx), env, save_name, gamma) episode = 0 step = 0 p_loss = None v_loss = None entropy = None #gradient reset & parameter synchronize agent.update_parameter(global_agent) start_step = step states = [] actions = [] rewards = [] dones = [] score = 0 state = env.reset() while total_step < total_max_step: step += 1 total_step += 1 action = agent.get_action(state, True) #if action[0] > 0: # a_t = 1 #else : # a_t = 0 next_state, reward, done, info = env.step(action) #next_state, reward, done, info = env.step(a_t) ####### modify reward function ####### #reward = 200-cnt if done else 0 #reward /= 10 ####### modify reward function ####### states.append(state) actions.append(action) rewards.append(reward) dones.append(done) score += reward if step - start_step == step_period: ret = 0 if done else agent.get_value(next_state) targets = [] for i in range(len(states)): if dones[-i - 1]: ret = 0 #elif i > 0: # ret = agent.get_value(states[-i]) ret = rewards[-i - 1] + gamma * ret targets.append(ret) targets = targets[::-1] p_grad, p_loss, v_grad, v_loss, entropy = agent.calc_gradient( states, actions, targets) p_losses.append(p_loss) v_losses.append(v_loss) entropies.append(entropy) global_agent.update_with_gradients(p_grad, v_grad) #loss_logger.write([step-start_step,p_loss,v_loss]) agent.update_parameter(global_agent) if t_idx == 0: graph.update(np.mean(scores), np.mean(p_losses), np.mean(v_losses), np.mean(entropies)) start_step = step states = [] actions = [] rewards = [] dones = [] state = next_state #score_logger.write([cnt, score]) if done: episode += 1 if t_idx == 0 and episode % 10 == 0: global_agent.save() scores.append(score) print(t_idx, score) score = 0 state = env.reset() threads = [] for i in range(num_thread): threads.append(threading.Thread(target=thread_func, args=(i, ))) threads[-1].start() for thread in threads: thread.join() graph.update(0, 0, 0, 0, True)