class VariationalJointEmpowerment(VariationalBaseEmpowerment):
    def __init__(self, init_params, lr=0.01):
        super(VariationalJointEmpowerment, self).__init__()
        self.transition = MLPNetwork(init_params['num_in_trans'],
                                     init_params['num_out_trans'],
                                     recurrent=True)
        self.source = MLPNetwork(init_params['num_in_src'],
                                 init_params['num_out_src'],
                                 recurrent=True)
        self.planning = MLPNetwork(init_params['num_in_plan'],
                                   init_params['num_out_plan'],
                                   recurrent=True)
        self.lr = lr

        self.device = Device('cpu')

        self.computer = ComputerJoint(self)
        self.trainer = TrainerJoint(self)

    def compute(self, rewards, next_obs):
        return self.computer.compute(rewards, next_obs)

    def update(self, sample, logger=None):
        return self.trainer.update(sample, logger)

    def prep_training(self, device='gpu'):
        self.transition.train()
        self.source.train()
        self.planning.train()

        if device == 'gpu':
            fn = lambda x: x.cuda()
        else:
            fn = lambda x: x.cpu()
        if not self.device.get_device() == device:
            self.transition = fn(self.transition)
            self.source = fn(self.source)
            self.planning = fn(self.planning)

        self.device.set_device(device)

    def prep_rollouts(self, device='cpu'):
        self.transition.eval()
        self.source.eval()
        self.planning.eval()

        if device == 'gpu':
            fn = lambda x: x.cuda()
        else:
            fn = lambda x: x.cpu()
        # only need main policy for rollouts
        if not self.device.get_device() == device:
            self.transition = fn(self.transition)
            self.source = fn(self.source)
            self.planning = fn(self.planning)

        self.device.set_device(device)

    @classmethod
    def init_from_env(cls, env):
        num_in_source = num_out_source = num_in_planning = \
            num_out_planning = num_in_transition = num_out_transition = 0
        for acsp, obsp in zip(env.action_space, env.observation_space):

            num_in_source = obsp.shape[0]
            num_out_source = acsp.n

            num_in_planning = 2 * obsp.shape[0]
            num_out_planning = acsp.n

            num_in_transition += obsp.shape[0] + acsp.n
            num_out_transition += obsp.shape[0]

        init_params = {
            'num_in_src': num_in_source,
            'num_in_plan': num_in_planning,
            'num_in_trans': num_in_transition,
            'num_out_src': num_out_source,
            'num_out_plan': num_out_planning,
            'num_out_trans': num_out_transition
        }

        instance = cls(init_params)
        instance.init_dict = init_params
        return instance
class SocialInfluence(BaseEmpowerment):
    def __init__(self, agents, init_params, num_in_trans, num_out_trans, lr=0.01, hidden_dim=64, recurrent=False,
                 convolutional=False):
        super(SocialInfluence, self).__init__()
        self.agents = agents
        self.device = Device('cpu')
        self.transition = MLPNetwork(num_in_trans, num_out_trans, recurrent=True)
        self.planning = [MLPNetwork(p['num_in_plan'], p['num_out_plan'], recurrent=True) for p in init_params]

        self.lr = lr

        self.niter = 0

        self.computer = Computer(self)
        self.trainer = Trainer(self)

    def compute(self, rewards, next_obs):
        next_obs = [Variable(torch.Tensor(np.vstack(next_obs[:, i])),
                             requires_grad=False) for i in range(next_obs.shape[1])]
        si = self.computer.compute(next_obs)
        i_rews = si.mean(-1)
        return i_rews.detach().numpy().reshape(1, -1)

    def update(self, sample, logger=None):
        return self.trainer.update(sample, logger)

    def prep_training(self, device='gpu'):
        self.transition.train()
        for planning in self.planning:
            planning.train()

        if device == 'gpu':
            fn = lambda x: x.cuda()
        else:
            fn = lambda x: x.cpu()
        if not self.device.get_device() == device:
            self.transition = fn(self.transition)
            for planning in self.planning:
                planning = fn(planning)

        self.device.set_device(device)

    def prep_rollouts(self, device='cpu'):
        self.transition.eval()
        for planning in self.planning:
            planning.eval()

        if device == 'gpu':
            fn = lambda x: x.cuda()
        else:
            fn = lambda x: x.cpu()
        # only need main policy for rollouts
        if not self.device.get_device() == device:
            self.transition = fn(self.transition)
            for planning in self.planning:
                planning = fn(planning)

        self.device.set_device(device)

    @classmethod
    def init(cls, agents, env, lr=0.01, hidden_dim=64, recurrent=False, convolutional=False):
        """
        Instantiate instance of this class from multi-agent environment
        """
        init_params = []

        num_in_transition = num_out_transition = 0
        for i, (acsp, obsp) in enumerate(zip(env.action_space, env.observation_space)):

            num_in_transition += obsp.shape[0] + acsp.n
            num_out_transition += obsp.shape[0]

            num_in_planning = 2 * obsp.shape[0]
            for j, acsp_j in enumerate(env.action_space):
                if j != i: num_in_planning += acsp_j.n
            num_out_planning = acsp.n
            init_params.append({'num_in_plan': num_in_planning,
                                'num_out_plan': num_out_planning})

        init_dict = {'agents': agents,
                     'lr': lr,
                     'hidden_dim': hidden_dim,
                     'init_params': init_params,
                     'num_in_trans': num_in_transition,
                     'num_out_trans': num_out_transition,
                     'recurrent': recurrent,
                     'convolutional': convolutional}
        instance = cls(**init_dict)
        instance.init_dict = init_dict
        return instance