from Normal import exploration from Normal import measure_length from Normal import moving_average #load data double_without=pickle.load(open( "double_without_record.p", "rb")) double_record=pickle.load(open( "double_record.p", "rb")) double_evaluation_record_without=pickle.load(open( "double_evaluation_without_record.p", "rb")) double_evaluation_record=pickle.load(open( "double_evaluation_record.p", "rb")) evalu_without_ave=sum(double_evaluation_record_without)/evaluation_episode_number evalu_ave=sum(double_evaluation_record)/evaluation_episode_number #plot training results def moving_average(x, w): return np.convolve(x, np.ones(w), 'valid') / w double_without_average=moving_average(double_without,average_over) double_record_average=moving_average(double_record,average_over) fig=plt.figure(figsize=(13,7)) env_standard=800 x=range(len(double_record_average)) plt.plot(x,double_without_average,label='Normal Training\nEvaluation %s'%evalu_without_ave,color='black',linestyle='-.') plt.plot(x,double_record_average,label='Coached by PID Controller\nEvaluation %s'%evalu_ave,color='magenta') plt.xlabel('Episode Number', fontsize='large') plt.ylabel('Episode Reward', fontsize='large') plt.legend(loc='upper left',ncol=1, borderaxespad=0,prop={'size': 18}) plt.axhline(y=env_standard, color='black', linestyle='dotted') plt.savefig('double.png')
actions=[thigh_actions,leg_actions,foot_actions,left_thigh_actions,left_leg_actions,left_foot_actions] states, terminal, reward = environment.execute(actions=actions) reward= -1 episode_reward+=reward agent.observe(terminal=terminal, reward=reward) else: states, terminal, reward = environment.execute(actions=actions) agent.observe(terminal=terminal, reward=reward) episode_reward+=reward record.append(episode_reward) reward_record[k][i]=record temp=np.array(record) reward_record_average[k][i]=moving_average(temp,average_over) #evaluate episode_reward = 0.0 eva_reward_record=[] print('evaluating agent with boundary position at %s and prohibitive parameter %s' %(prohibition_position[k],prohibition_parameter[i])) for j in tqdm(range(evaluation_episode_number)): episode_reward=0 states = environment.reset() internals = agent.initial_internals() terminal = False while not terminal: actions, internals = agent.act(states=states, internals=internals, independent=True, deterministic=True) states, terminal, reward = environment.execute(actions=actions) episode_reward += reward eva_reward_record.append(episode_reward)
ip_without = pickle.load(open("ip_without_record.p", "rb")) ip_record = pickle.load(open("ip_record.p", "rb")) ip_evaluation_record_without = pickle.load( open("ip_evaluation_without_record.p", "rb")) ip_evaluation_record = pickle.load(open("ip_evaluation_record.p", "rb")) evalu_without_ave = sum( ip_evaluation_record_without) / evaluation_episode_number evalu_ave = sum(ip_evaluation_record) / evaluation_episode_number def moving_average(x, w): return np.convolve(x, np.ones(w), 'valid') / w ip_without_average = moving_average(ip_without, average_over) ip_record_average = moving_average(ip_record, average_over) fig = plt.figure(figsize=(13, 7)) env_standard = 800 x = range(len(ip_record_average)) plt.plot(x, ip_without_average, label='Normal Training\nEvaluation %s' % evalu_without_ave, color='black', linestyle='-.') plt.plot(x, ip_record_average, label='Coached by PID Controller\nEvaluation %s' % evalu_ave, color='magenta') plt.xlabel('Episode Number', fontsize='large')