예제 #1
0
        else:
            next_obs = None

        total_reward += reward.sum()
        adversaries_reward += reward[0:2].sum()
        agent_reward = reward[3]
        rr += reward.cpu().numpy()
        maddpg.memory.push(obs.data, action, next_obs, reward)
        #for i in range(len(next_obs)):
        #        for j in range(4):
        #            for k in range(len(next_obs[i][j])):
        #                if next_obs[i] != None:
        #                    print('next_obs[i][j][k]',type(next_obs[i][j][k]),i,j,k)
        #print('next_obs',len(next_obs))  4 ndarray  next_obs[0] <class 'torch.FloatTensor'> len(next_obs[0]) 16
        obs = next_obs
        c_loss, a_loss = maddpg.update_policy(i_episode)
        env.render()
    maddpg.episode_done += 1
    endTime = datetime.datetime.now()
    runTime = (endTime - startTime).seconds
    totalTime = totalTime+runTime
    print('Episode:%d,reward = %f' % (i_episode, total_reward))
    print('Episode:%d,adversaries_reward = %f' % (i_episode, adversaries_reward))
    print('Episode:%d,agent_reward = %f' % (i_episode, agent_reward))
    print('this episode run time:'+ str(runTime))
    print('totalTime:'+ str(totalTime))
    reward_record.append(total_reward)
    adversaries_reward_record.append(adversaries_reward)
    agent_reward_record.append(agent_reward)

    if maddpg.episode_done == maddpg.episodes_before_train:
예제 #2
0
        else:
            next_obs = None

        total_reward += reward.sum()
        #adversaries_reward += reward[0:5].sum()
        if initial_train is False:
            total_reward_5 += reward[4]
        else:
            total_reward_5 += 0.0
        #agent_reward += reward[5:9].sum()
        rr += reward.cpu().numpy()

        maddpg.memory.push(obs.data, action, next_obs, reward, agent_max_id)

        obs = next_obs
        c_loss, a_loss = maddpg.update_policy(i_episode, initial_train)
        #frame.append(env.render())
        #env.render()
    #if i_episode == 1:
    #    a = np.array(frame)
    #    b = np.reshape(a, (600, 700, 700, 3))
    #    imageio.mimsave('test_adv.gif', b, 'GIF')
    if i_episode % 100 == 0 and i_episode > 0 and test_initial is False and initial_train is True:
        for i in range(maddpg.n_agents):
            th.save(
                maddpg.critics[i], 'new/model_new/critic[' + str(i) +
                '].pkl_episode' + str(i_episode))
            th.save(
                maddpg.actors[i], 'new/model_new/actors[' + str(i) +
                '].pkl_episode' + str(i_episode))
    if i_episode % 100 == 0 and i_episode > 0 and test_initial is True and initial_train is False: