def communication_channel(self, env, agent_a, agent_b, target, perception):
        # add perceptual noise

        noise = th.float_var(
            Normal(
                torch.zeros(self.batch_size, self.perception_dim),
                torch.ones(self.batch_size, self.perception_dim) *
                self.perception_noise).sample())

        perception = perception + noise
        # generate message
        msg_logits = agent_a(perception=perception)
        # msg_probs = F.gumbel_softmax(msg_logits, tau=2 / 3)
        noise = th.float_var(
            Normal(torch.zeros(self.batch_size, self.msg_dim),
                   torch.ones(self.batch_size, self.msg_dim) *
                   self.com_noise).sample())

        msg_probs = F.softmax(msg_logits + noise, dim=1)
        #msg_probs = F.gumbel_softmax(msg_logits + noise, tau=10 / 3, dim=1)
        msg_dist = Categorical(msg_probs)
        msg = msg_dist.sample()
        # interpret message and sample a guess
        guess_logits = agent_b(msg=msg)
        guess_probs = F.softmax(guess_logits, dim=1)
        #guess_probs = F.gumbel_softmax(msg_logits, tau=10 / 3, dim=1)
        m = Categorical(guess_probs)
        guess = m.sample()

        #compute reward
        if self.reward_func == 'regier_reward':
            CIELAB_guess = env.chip_index2CIELAB(guess.data)
            reward = env.regier_reward(perception,
                                       CIELAB_guess,
                                       bw_boost=self.bw_boost)
        elif self.reward_func == 'abs_dist':
            diff = torch.abs(target.unsqueeze(dim=1) - guess.unsqueeze(dim=1))
            reward = 1 - (diff.float() / 100)  #1-(diff.float()/50)

        # compute loss and update model
        if self.loss_type == 'REINFORCE':
            sender_loss = -(reward *
                            msg_dist.log_prob(msg)).sum() / self.batch_size
            receiver_loss = -(reward *
                              m.log_prob(guess)).sum() / self.batch_size
            #receiver_loss = self.criterion_receiver(guess_logits, target.squeeze())

            loss = receiver_loss + sender_loss

        elif self.loss_type == 'CrossEntropyLoss':
            loss = self.criterion_receiver(guess_logits, target.squeeze())
        return loss
Ejemplo n.º 2
0
    def play(self, env, agent_a, agent_b):
        agent_a = th.cuda(agent_a)
        agent_b = th.cuda(agent_b)

        optimizer = optim.Adam(
            list(agent_a.parameters()) + list(agent_b.parameters()))

        for i in range(self.max_epochs):
            optimizer.zero_grad()

            color_codes, colors = env.mini_batch(batch_size=self.batch_size)
            color_codes = th.long_var(color_codes)
            colors = th.float_var(colors)

            loss = self.communication_channel(env, agent_a, agent_b,
                                              color_codes, colors)

            loss.backward()
            optimizer.step()

            # printing status
            if self.print_interval != 0 and ((i + 1) % self.print_interval
                                             == 0):
                self.print_status(loss)

            if self.evaluate_interval != 0 and ((i + 1) %
                                                self.evaluate_interval == 0):
                self.evaluate(env, agent_a)

        return agent_a.cpu()
 def communication_channel(self, env, agent_a, agent_b, target, perception):
     # add perceptual noise
     if self.training_mode:
         noise = th.float_var(
             Normal(
                 torch.zeros(self.batch_size, self.perception_dim),
                 torch.ones(self.batch_size, self.perception_dim) *
                 self.perception_noise).sample())
         perception = perception + noise
     # Sample message
     probs = agent_a(perception=perception)
     m = Categorical(probs)
     msg = m.sample()
     # interpret message
     guess = agent_b(msg=msg)
     # compute reward
     if self.reward_func == 'basic_reward':
         reward = env.basic_reward(target, guess)
     elif self.reward_func == 'regier_reward':
         reward = env.regier_reward(perception, guess)
     elif self.reward_func == 'number_reward':
         reward = env.number_reward(target, guess)
     elif self.reward_func == 'inverted_reward':
         reward = env.inverted_number(target, guess)
     self.sum_reward += reward.sum()
     # compute loss
     self.loss_sender = self.sender_loss_multiplier * (
         (-m.log_prob(msg) * reward).sum() / self.batch_size)
     self.loss_receiver = self.criterion_receiver(guess, target)
     return self.loss_receiver + self.loss_sender
    def play(self, env, agent_a, agent_b):
        agent_a = th.cuda(agent_a)
        agent_b = th.cuda(agent_b)
        receiver_opt = optim.Adam(list(agent_b.parameters()))
        optimizer = optim.Adam(
            list(agent_a.parameters()) + list(agent_b.parameters()))

        for i in range(self.max_epochs):
            for j in range(50):
                color_codes, colors = env.mini_batch(
                    batch_size=self.batch_size)
                color_codes = th.long_var(color_codes)
                colors = th.float_var(colors)
                receiver_loss, _, _ = self.communication_channel(
                    env, agent_a, agent_b, color_codes, colors)
                receiver_loss.backward()
                receiver_opt.step()
                receiver_opt.zero_grad()
            self.board_reward = 0
            optimizer.zero_grad()

            color_codes, colors = env.mini_batch(batch_size=self.batch_size)
            color_codes = th.long_var(color_codes)
            colors = th.float_var(colors)

            receiver_loss, sender_loss, entropy_loss = self.communication_channel(
                env, agent_a, agent_b, color_codes, colors)
            loss = receiver_loss + sender_loss + entropy_loss
            loss.backward()
            optimizer.step()

            # Update tensorboard
            #print(self.tensorboard)
            if ((i + 1) % self.print_interval == 0):
                self.tensorboard_update(i, env, agent_a, agent_b)
            # printing status
            if self.print_interval != 0 and ((i + 1) % self.print_interval
                                             == 0):
                if self.loss_type == 'REINFORCE':
                    #self.print_status(-loss)
                    self.print_status(loss)
        #        else:
        #self.print_status(loss)

            if self.evaluate_interval != 0 and ((i + 1) %
                                                self.evaluate_interval == 0):
                self.evaluate(env, agent_a)
 def word2number(self, agent):
     msg = th.float_var(np.eye(agent.msg_dim))
     guess_logits = agent(msg=msg)
     guess_probs = F.softmax(guess_logits, dim=1)
     _, guess = guess_probs.max(1)
     guess = guess.data.numpy()
     dublicates = [
         item for item, count in Counter(guess).items() if count > 1
     ]
     return -len(dublicates) * 2
    def play(self, env, agent_a, agent_b):
        agent_a = th.cuda(agent_a)
        agent_b = th.cuda(agent_b)

        optimizer = optim.Adam(list(agent_a.parameters()) +
                               list(agent_b.parameters()),
                               lr=0.0001)

        for i in range(self.max_epochs):
            optimizer.zero_grad()
            # Agent a sends a message
            color_codes, colors = env.mini_batch(batch_size=self.batch_size)
            color_codes = th.long_var(color_codes)
            colors = th.float_var(colors)
            loss1 = self.communication_channel(env, agent_a, agent_b,
                                               color_codes, colors)
            loss1.backward()
            # Agent b sends a message
            color_codes, colors = env.mini_batch(batch_size=self.batch_size)
            color_codes = th.long_var(color_codes)
            colors = th.float_var(colors)
            loss2 = self.communication_channel(env, agent_b, agent_a,
                                               color_codes, colors)
            loss2.backward()
            # Backprogate
            #loss.backward()
            optimizer.step()
            loss = loss1 + loss2
            # printing status
            if self.print_interval != 0 and ((i + 1) % self.print_interval
                                             == 0):
                #self.tensorboard_update(i, env, agent_a)
                self.print_status(loss)

            if self.evaluate_interval != 0 and ((i + 1) %
                                                self.evaluate_interval == 0):
                self.evaluate(env, agent_a)

        return agent_a.cpu()
def agent_language_map(env, a):
    V = {}
    a = th.cuda(a)
    perception_indices, perceptions = env.full_batch()
    if isinstance(perceptions, np.ndarray):
        perceptions = th.float_var(
            torch.tensor(perceptions, dtype=torch.float32))
    probs = a(perception=perceptions)
    _, terms = probs.max(1)

    for perception_index in perception_indices:
        V[perception_index] = terms[perception_index].item()

    return list(V.values())
    def compute_gibson_cost(self, env, a):
        _, perceptions = env.full_batch()
        if isinstance(perceptions, np.ndarray):
            perceptions = th.float_var(
                torch.tensor(perceptions, dtype=torch.float32))
        perceptions = perceptions.cpu()
        all_terms = th.long_var(range(a.msg_dim), False)
        p_WC = F.softmax(a(perception=perceptions), dim=1).t().data.numpy()

        p_CW = F.softmax(a(msg=all_terms), dim=1).data.numpy()

        S = -np.diag(np.matmul(p_WC.transpose(), (np.log2(p_CW))))
        avg_S = S.sum() / len(S)  # expectation assuming uniform prior
        # debug code
        # s = 0
        # c = 43
        # for w in range(a.msg_dim):
        #     s += -p_WC[w, c]*np.log2(p_CW[w, c])
        # print(S[c] - s)
        return S, avg_S
    def play(self, env, agent_a, agent_b):
        agent_a = th.cuda(agent_a)
        agent_b = th.cuda(agent_b)
        optimizer = optim.Adam(list(agent_a.parameters()) +
                               list(agent_b.parameters()),
                               lr=0.0001)

        for i in range(self.max_epochs):
            optimizer.zero_grad()

            color_codes, colors = env.mini_batch(batch_size=self.batch_size)
            color_codes = th.long_var(color_codes)
            colors = th.float_var(colors)

            loss = self.communication_channel(env, agent_a, agent_b,
                                              color_codes, colors)
            loss.backward()
            optimizer.step()

            # Update tensorboard
            #print(self.tensorboard)
            # if((i+1) % self.print_interval == 0):
            #     self.tensorboard_update(i, env, agent_a, agent_b)
            # printing status
            if self.print_interval != 0 and ((i + 1) % self.print_interval
                                             == 0):
                if self.loss_type == 'REINFORCE':
                    #self.print_status(-loss)
                    self.print_status(loss)
                else:
                    self.print_status(loss)

            if self.evaluate_interval != 0 and ((i + 1) %
                                                self.evaluate_interval == 0):
                self.evaluate(env, agent_a)

        #agent_a.reward_log = self.reward_log
        #agent_b.reward_log = self.reward_log

        return agent_a.cpu()
Ejemplo n.º 10
0
    def __init__(self, wcs_path='data/') -> None:
        super().__init__()

        baseurl = 'http://www1.icsi.berkeley.edu/wcs/data/'
        self.get_data(baseurl + 'cnum-maps/cnum-vhcm-lab-new.txt',
                      'data/cnum-vhcm-lab-new.txt')
        self.get_data(baseurl + '20021219/txt/term.txt', 'data/term.txt')
        self.get_data(baseurl + '20041016/txt/dict.txt', 'data/dict.txt')

        # http://www1.icsi.berkeley.edu/wcs/data/cnum-maps/cnum-vhcm-lab-new.txt
        # http://www1.icsi.berkeley.edu/wcs/data/20021219/txt/term.txt
        # http://www1.icsi.berkeley.edu/wcs/data/20041016/txt/dict.txt

        self.color_chips = pd.read_csv(wcs_path + 'cnum-vhcm-lab-new.txt',
                                       sep='\t')
        self.cielab_map = th.float_var(self.color_chips[['L*', 'a*',
                                                         'b*']].values)

        self.term = pd.read_csv(
            wcs_path + 'term.txt',
            sep='\t',
            names=['lang_num', 'spkr_num', 'chip_num', 'term_abrev'])
        self.dict = pd.read_csv(
            wcs_path + 'dict.txt',
            sep='\t',
            skiprows=[0],
            names=['lang_num', 'term_num', 'term', 'term_abrev'])
        self.term_nums = pd.merge(
            self.term,
            self.dict.drop_duplicates(subset=['lang_num', 'term_abrev']),
            how='inner',
            on=['lang_num', 'term_abrev'])

        self.human_mode_maps = self.compute_human_mode_maps(wcs_path)

        self.plot_with_colors(V=None,
                              save_to_path=wcs_path +
                              'mode_maps/empty_map.png')
    def communication_channel(self, env, agent_a, agent_b, target, perception):
        # add perceptual noise

        noise = th.float_var(
            Normal(
                torch.zeros(self.batch_size, self.perception_dim),
                torch.ones(self.batch_size, self.perception_dim) *
                self.perception_noise).sample())
        perception = perception + noise
        # generate message
        msg_logits = agent_a(perception=perception)
        # msg_probs = F.gumbel_softmax(msg_logits, tau=2 / 3)
        noise = th.float_var(
            Normal(torch.zeros(self.batch_size, self.msg_dim),
                   torch.ones(self.batch_size, self.msg_dim) *
                   self.com_noise).sample())
        msg_probs = F.softmax(msg_logits + noise, dim=1)
        #ßßßßmsg_probs = F.gumbel_softmax(msg_logits + noise, tau=10 / 3, dim=1)
        msg_dist = Categorical(msg_probs)
        msg = msg_dist.sample()
        # interpret message and sample a guess
        guess_logits = agent_b(msg=msg)
        guess_probs = F.softmax(guess_logits, dim=1)
        #guess_probs = F.gumbel_softmax(msg_logits, tau=10 / 3, dim=1)
        m = Categorical(guess_probs)
        guess = m.sample()

        # Reconstruct( Sanity check)
        recon_logits = agent_a(msg=msg)
        recon_probs = F.softmax(recon_logits, dim=1)
        recon_dist = Categorical(recon_probs)
        recon_guess = recon_dist.sample()
        # CrossEntropy or REINFORCE?
        # This becomes a standard autoencoder ?
        recon_diff = torch.abs(target - recon_guess.unsqueeze(dim=1))
        recon_reward = 1 - (recon_diff.float() / 100)
        recon_loss = 0.5 * (-recon_dist.log_prob(recon_guess) *
                            recon_reward).sum() / self.batch_size
        #recon_loss =self.recon_param * self.criterion_receiver(recon_logits, target.squeeze())
        #compute reward
        if self.reward_func == 'regier_reward':
            CIELAB_guess = env.chip_index2CIELAB(guess.data)
            reward = env.regier_reward(perception,
                                       CIELAB_guess,
                                       bw_boost=self.bw_boost)
        elif self.reward_func == 'abs_dist':
            diff = torch.abs(target - guess.unsqueeze(dim=1))
            reward = 1 - (diff.float() / 100)  #1-(diff.float()/50)
            #reward = 1 /(diff.float()+1)**2
        elif self.reward_func == 'exp_reward':
            diff = torch.abs(target - guess.unsqueeze(dim=1))
            reward = 2**(-diff.float())  #1-(diff.float()/50)
            #reward = 1 /(diff.float()+1)**2
        elif self.reward_func == 'number_reward':
            reward = env.number_reward(target, guess)
        elif self.reward_func == 'inverted_reward':
            reward = env.inverted_number(target, guess)
        elif self.reward_func == 'interval_reward':
            reward = env.interval_reward(target, guess)
        elif self.reward_func == 'target_reward':
            reward = env.target_reward(target, guess)
        elif self.reward_func == 'sim_index':
            reward = env.sim_index(target, guess)
        self.sum_reward += reward.sum()
        self.board_reward = reward
        # compute loss and update model
        if self.loss_type == 'REINFORCE':
            #receiver_loss =  self.criterion_receiver(guess_logits, target.squeeze())
            sender_loss = (-msg_dist.log_prob(msg) *
                           reward).sum() / self.batch_size
            receiver_loss = (-m.log_prob(guess) *
                             reward).sum() / self.batch_size
            # For tensorboard logging
            #entropy_loss =  -(self.entropy_coef * (msg_dist.entropy().mean() + m.entropy().mean()))
            # self.sender_loss += sender_loss
            # self.receiver_loss += receiver_loss
            #self.entropy_coef = 0.999 * self.entropy_coef
            loss = receiver_loss + sender_loss + recon_loss
            # loss = receiver_loss + sender_loss + entropy_loss
            #loss = receiver_loss
        elif self.loss_type == 'CrossEntropyLoss':
            loss = self.criterion_receiver(guess_logits, target.squeeze())
            # For tensorboard logging
        return loss
    def communication_channel(self, env, agent_a, agent_b, target, perception):
        # add perceptual noise
        noise = th.float_var(
            Normal(
                torch.zeros(self.batch_size, self.perception_dim),
                torch.ones(self.batch_size, self.perception_dim) *
                self.perception_noise).sample())
        perception = perception + noise
        # generate message
        msg_logits = agent_a(perception=perception)
        # msg_probs = F.gumbel_softmax(msg_logits, tau=2 / 3)
        noise = th.float_var(
            Normal(torch.zeros(self.batch_size, self.msg_dim),
                   torch.ones(self.batch_size, self.msg_dim) *
                   self.com_noise).sample())
        msg_probs = F.softmax(msg_logits + noise, dim=1)
        #ßßßßmsg_probs = F.gumbel_softmax(msg_logits + noise, tau=10 / 3, dim=1)
        msg_dist = Categorical(msg_probs)
        msg = msg_dist.sample()
        # interpret message and sample a guess
        guess_logits = agent_b(msg=msg_probs)
        noise = th.float_var(
            Normal(torch.zeros(self.batch_size, self.msg_dim),
                   torch.ones(self.batch_size, self.msg_dim) *
                   self.com_noise).sample())
        guess_probs = F.softmax(guess_logits, dim=1)
        #guess_probs = F.gumbel_softmax(msg_logits, tau=10 / 3, dim=1)
        m = Categorical(guess_probs)
        guess = m.sample()

        #compute reward
        if self.reward_func == 'regier_reward':
            CIELAB_guess = env.chip_index2CIELAB(guess.data)
            reward = env.regier_reward(perception,
                                       CIELAB_guess,
                                       bw_boost=self.bw_boost)
        elif self.reward_func == 'abs_dist':
            diff = torch.abs(target - (1 + guess.unsqueeze(dim=1)))
            reward = 1 - (diff.float() / 100)  #1-(diff.float()/50)
        elif self.reward_func == 'abs_penalty':
            diff = torch.abs(target - (1 + guess.unsqueeze(dim=1)))
            reward = 1 - (diff.float() / 100)
            # Check whether the reciver assigns more than one word to each number
            reward = reward + env.word2number(agent_b)
        elif self.reward_func == 'exp_reward':
            diff = torch.abs(target - guess.unsqueeze(dim=1))
            reward = 2**(-0.1 * diff.float())
        elif self.reward_func == 'sim_index':
            reward = env.sim_index(target, guess)
        self.sum_reward += reward.sum()
        self.board_reward = reward
        # compute loss and update model
        if self.loss_type == 'REINFORCE':
            # compute baseline
            self.n_points += 1
            self.baseline += (reward.mean() - self.baseline) / self.n_points
            # receiver_loss =  self.criterion_receiver(guess_logits, target.squeeze())
            sender_loss = (-msg_dist.log_prob(msg) *
                           (reward - self.baseline)).sum() / self.batch_size
            receiver_loss = (-m.log_prob(guess) *
                             (reward - self.baseline)).sum() / self.batch_size
            entropy_loss = -(
                self.entropy_coef *
                (1 * msg_dist.entropy().mean() + 3 * m.entropy().mean()))
            # For tensorboard logging
            self.sender_loss += sender_loss
            self.receiver_loss += receiver_loss
            #self.entropy_coef = 0.999 * self.entropy_coef
            # loss = receiver_loss + sender_loss
            #loss = receiver_loss
            return receiver_loss, sender_loss, entropy_loss
        elif self.loss_type == 'CrossEntropyLoss':
            loss = self.criterion_receiver(guess_logits, target.squeeze())
            # For tensorboard logging
            self.receiver_loss += loss
        return receiver_loss, sender_loss, entropy_loss
    def tensorboard_update(self, epoch, env, a_agent, b_agent):
        # Log scalars
        writer.add_scalar(
            'Loss/sender_loss',
            self.sender_loss / (self.print_interval * self.batch_size), epoch)
        writer.add_scalar(
            'Loss/receiver_loss',
            self.receiver_loss / (self.print_interval * self.batch_size),
            epoch)
        writer.add_scalar(
            'Metrics/Reward_' + str(self.reward_func),
            self.board_reward.sum() / (self.print_interval * self.batch_size),
            epoch)
        # log evaluation metrics
        V = evaluate.agent_language_map(env, a_agent)
        # term usage
        termed_used = evaluate.compute_term_usage(V=V)[-1]
        writer.add_scalar('Metrics/term_usage', termed_used, epoch)
        # Agent-stats
        # perception_layer = a_agent.perception_embedding.weight
        # msg_layer = a_agent.msg_creator.weight
        # writer.add_histogram('Sender/perception_layer', perception_layer, epoch)
        # writer.add_histogram('Sender/msg_layer', msg_layer, epoch)
        # writer.add_scalar('Sender/perception_layer_grad', torch.abs(perception_layer.grad).sum(), epoch)
        # writer.add_scalar('Sender/msg_layer_grad', torch.abs(msg_layer.grad).sum(), epoch)
        #
        # receiver_layer = b_agent.msg_receiver.weight
        # guess_layer = b_agent.color_estimator.weight
        # writer.add_histogram('Receiver/receiver_layer', receiver_layer, epoch)
        # writer.add_histogram('Receiver/guess_layer', guess_layer, epoch)
        # writer.add_scalar('Receiver/receiver_layer_grad', torch.abs(receiver_layer.grad).sum(), epoch)
        # writer.add_scalar('Receiver/guess_layer_grad', torch.abs(guess_layer.grad).sum(), epoch)

        # add batch
        #writer.add_text('Batch', str(self.batch), epoch)

        # Produce partition
        # if number environment:
        partition = self.compute_ranges(V)
        writer.add_text('Partition', str(partition), epoch)
        writer.flush()
        self.sender_loss = 0
        self.receiver_loss = 0
        # Guesses
        msg = th.float_var(np.eye(a_agent.msg_dim))
        guess_logits = b_agent(msg=msg)
        guess_probs = F.softmax(guess_logits, dim=1)
        _, guess = guess_probs.max(1)
        writer.add_text('Reciever guesses', str(guess + 1), epoch)

        index, perception = env.full_batch()
        prob = F.softmax(a_agent(th.float_var(perception)), dim=1)
        prob = prob.detach().numpy()
        guess_probs = guess_probs.detach().numpy()
        for i in range(perception.shape[0]):
            fig, ax = plt.subplots(figsize=(5, 5))
            plt.plot(range(a_agent.msg_dim), prob[i, :])
            writer.add_figure('prob_words' + str(i + 1) + '/sender', fig,
                              epoch)

        for i in range(guess_probs.shape[0]):
            fig, ax = plt.subplots(figsize=(5, 5))
            plt.plot(range(guess_probs.shape[1]), guess_probs[i, :])
            writer.add_figure('prob_guess' + str(i + 1) + '/receiver', fig,
                              epoch)