Beispiel #1
0
    def compute_single_likelihood(self, udi):
        """
                Compute the likelihood of a single data point, udi, an utteranceData
        """
        assert isinstance(udi, UtteranceData)

        # Types of utterances
        trues, falses, others = self.partition_utterances(
            udi.possible_utterances, udi.context)
        #print "T:", trues
        #print "F:", falses
        #print "U:", others
        u = udi.utterance

        # compute the weights
        all_weights = sum(
            map(lambda u: self.weightfunction(u, udi.context),
                udi.possible_utterances))
        true_weights = sum(
            map(lambda u: self.weightfunction(u, udi.context), trues))
        met_weights = sum(
            map(lambda u: self.weightfunction(u, udi.context),
                falses)) + true_weights

        w = self.weightfunction(u, udi.context)  # the current word weight
        if (u in trues):
            p = self.palpha * self.alpha * w / true_weights + self.palpha *  \
            (1.0 - self.alpha) * w / met_weights + (1.0 - self.palpha) * w / \
            all_weights # choose from the trues
        elif (u in falses):
            p = ifelse(true_weights == 0, 1.0,
                       1.0 - self.alpha) * self.palpha * w / met_weights + (
                           1.0 - self.palpha
                       ) * w / all_weights  # choose from the trues
        else:
            p = ifelse(met_weights == 0, 1.0,
                       (1.0 - self.palpha)) * w / all_weights
        """
        TODO: WHY NOT THIS WAY, IGNORING tre_weights==0? Because if we sample, then we have 0 chance of getting a true when true_weights is like that. This causes problems in CCGLexicon
        w = self.weightfunction(u, udi.context) # the current word weight
        if   (u in trues):  p = self.palpha * (self.alpha * w / true_weights + (1.0 - self.alpha) * w / met_weights) + (1.0 - self.palpha) * w / all_weights # choose from the trues
        elif (u in falses): p = self.palpha * (1.0-self.alpha) * w / met_weights + (1.0 - self.palpha) * w / all_weights # choose from the trues
        else:               p = (1.0 - self.palpha) * w / all_weights
        """

        return log(p)
Beispiel #2
0
    def compute_single_likelihood(self, udi):
        """
                Compute the likelihood of a single data point, udi, an utteranceData
        """
        assert isinstance(udi, UtteranceData)

        # Types of utterances
        trues, falses, others = self.partition_utterances( udi.possible_utterances, udi.context)
        #print "T:", trues
        #print "F:", falses
        #print "U:", others
        u = udi.utterance

        # compute the weights
        all_weights  = sum(map( lambda u: self.weightfunction(u, udi.context), udi.possible_utterances ))
        true_weights = sum(map( lambda u: self.weightfunction(u, udi.context), trues))
        met_weights  = sum(map( lambda u: self.weightfunction(u, udi.context), falses)) + true_weights

        w = self.weightfunction(u, udi.context) # the current word weight
        if(u in trues):
            p = self.palpha * self.alpha * w / true_weights + self.palpha *  \
            (1.0 - self.alpha) * w / met_weights + (1.0 - self.palpha) * w / \
            all_weights # choose from the trues
        elif (u in falses):
            p = ifelse(true_weights==0, 1.0, 1.0-self.alpha) * self.palpha * w / met_weights + (1.0 - self.palpha) * w / all_weights # choose from the trues
        else:
            p = ifelse(met_weights==0, 1.0, (1.0 - self.palpha)) * w / all_weights

        """
        TODO: WHY NOT THIS WAY, IGNORING tre_weights==0? Because if we sample, then we have 0 chance of getting a true when true_weights is like that. This causes problems in CCGLexicon
        w = self.weightfunction(u, udi.context) # the current word weight
        if   (u in trues):  p = self.palpha * (self.alpha * w / true_weights + (1.0 - self.alpha) * w / met_weights) + (1.0 - self.palpha) * w / all_weights # choose from the trues
        elif (u in falses): p = self.palpha * (1.0-self.alpha) * w / met_weights + (1.0 - self.palpha) * w / all_weights # choose from the trues
        else:               p = (1.0 - self.palpha) * w / all_weights
        """


        return log(p)
Beispiel #3
0
def gricean_weight(h, testing_set, nu=1.0):
    """Takes a hypothesis and its function and returns the weight under a gricean setup.

    Production probability is proportional to:  exp( 1.0 / (nu + proportionoftimeitistrue) )

    Notes:
        The max weight is 1/nu, and this should not be huge compared to 1/alpha
        We (should) boundedly memoize this

    """

    pct = float(sum(map(lambda s: ifelse(h(s), 1.0, 0.0),
                        testing_set))) / len(testing_set)
    # pul out the context sets and apply f
    #pct = float(sum(map(lambda s: ifelse(f(*s) is True, 1.0, 0.0), testing_set) )) / len(testing_set)
    # pul out the context sets and apply f
    #pct = float(sum(map(lambda s: ifelse(collapse_undef(f(*s)), 1.0, 0.0), testing_set) )) / len(testing_set)

    return 1.0 / (nu + pct)