else: next_obs = None total_reward += reward.sum() adversaries_reward += reward[0:2].sum() agent_reward = reward[3] rr += reward.cpu().numpy() maddpg.memory.push(obs.data, action, next_obs, reward) #for i in range(len(next_obs)): # for j in range(4): # for k in range(len(next_obs[i][j])): # if next_obs[i] != None: # print('next_obs[i][j][k]',type(next_obs[i][j][k]),i,j,k) #print('next_obs',len(next_obs)) 4 ndarray next_obs[0] <class 'torch.FloatTensor'> len(next_obs[0]) 16 obs = next_obs c_loss, a_loss = maddpg.update_policy(i_episode) env.render() maddpg.episode_done += 1 endTime = datetime.datetime.now() runTime = (endTime - startTime).seconds totalTime = totalTime+runTime print('Episode:%d,reward = %f' % (i_episode, total_reward)) print('Episode:%d,adversaries_reward = %f' % (i_episode, adversaries_reward)) print('Episode:%d,agent_reward = %f' % (i_episode, agent_reward)) print('this episode run time:'+ str(runTime)) print('totalTime:'+ str(totalTime)) reward_record.append(total_reward) adversaries_reward_record.append(adversaries_reward) agent_reward_record.append(agent_reward) if maddpg.episode_done == maddpg.episodes_before_train:
else: next_obs = None total_reward += reward.sum() #adversaries_reward += reward[0:5].sum() if initial_train is False: total_reward_5 += reward[4] else: total_reward_5 += 0.0 #agent_reward += reward[5:9].sum() rr += reward.cpu().numpy() maddpg.memory.push(obs.data, action, next_obs, reward, agent_max_id) obs = next_obs c_loss, a_loss = maddpg.update_policy(i_episode, initial_train) #frame.append(env.render()) #env.render() #if i_episode == 1: # a = np.array(frame) # b = np.reshape(a, (600, 700, 700, 3)) # imageio.mimsave('test_adv.gif', b, 'GIF') if i_episode % 100 == 0 and i_episode > 0 and test_initial is False and initial_train is True: for i in range(maddpg.n_agents): th.save( maddpg.critics[i], 'new/model_new/critic[' + str(i) + '].pkl_episode' + str(i_episode)) th.save( maddpg.actors[i], 'new/model_new/actors[' + str(i) + '].pkl_episode' + str(i_episode)) if i_episode % 100 == 0 and i_episode > 0 and test_initial is True and initial_train is False: