def get_match_scores(self, action, random_set):
        embeddings = self.embeddings(random_set)
        if not self.config.true_embeddings:
            embeddings = F.tanh(embeddings)

        # compute similarity probability based on L2 norm
        diff = pairwise_distances(action, embeddings)
        return diff
    def get_match_scores(self, action):
        # compute similarity probability based on L2 norm
        embeddings = self.embeddings
        if not self.config.true_embeddings: #TODO
            embeddings = F.tanh(embeddings)

        # compute similarity probability based on L2 norm
        # a^2 + b^2 - 2ab
        similarity = - pairwise_distances(action, embeddings)  # Negate euclidean to convert diff into similarity score

        # compute similarity probability based on dot product
        # similarity = torch.mm(action, torch.transpose(embeddings, 0, 1))  # Dot product

        return similarity
    def get_match_scores(self, action):
        #计算负的欧几里得距离,维度与action相同
        #self.embeddings属于神经网络的一个参数,那么每一个动作都对应一个self.embeddings参数,还是所有的动作对应的 self.embeddings参数是一样的
        # compute similarity probability based on L2 norm
        embeddings = self.embeddings
        if not self.true_embeddings:
            embeddings = F.tanh(embeddings)

        # compute similarity probability based on L2 norm
        similarity = - pairwise_distances(action, embeddings)  # Negate euclidean to convert diff into similarity score

        # compute similarity probability based on dot product
        # similarity = torch.mm(action, torch.transpose(embeddings, 0, 1))  # Dot product

        return similarity
    def get_match_scores(self, action):
        # compute similarity probability based on L2 norm
        embeddings = self.embeddings
        if not self.config.true_embeddings:
            embeddings = torch.tanh(embeddings)

        # compute similarity probability based on L2 norm
        similarity = -pairwise_distances(
            action, embeddings
        )  # Negate euclidean to convert diff into similarity score

        # compute similarity probability based on dot product
        # similarity = torch.mm(action, torch.transpose(embeddings, 0, 1))  # Dot product

        # Never choose the actions not in the active set
        # Negative infinity ensures that these actions probability evaluates to 0 (e^-inf) during softmax as well
        similarity[:, self.action_mask == False] = float(
            '-inf')  # Dimension = (bacth_size x unmasked number of actions)

        return similarity