def compute_likelihood(self, data, **kwargs): constants = dict() ll = 0 for datum in data: if datum.context in constants.keys(): trueset = constants[datum.context][0] all_poss = constants[datum.context][1] else: try: if datum.context.ego is None: trueset = self.make_true_data(datum.context) else: trueset = self.make_true_data(datum.context, fixX=datum.context.ego) all_poss = len(self.all_words())*len(datum.context.objects)**2 constants[datum.context] = [trueset, all_poss] except RecursionDepthException: self.likelihood = -Infinity self.update_posterior() return self.likelihood if (datum.word, datum.X, datum.Y) in trueset: ll += log(self.alpha/len(trueset) + ((1.-self.alpha)/all_poss)) else: ll += log((1.-self.alpha)/all_poss) self.likelihood = ll / self.likelihood_temperature self.update_posterior() return self.likelihood
def compute_Zipf_likelihood(lexicon, data, s): constants = dict() ll = 0 for datum in data: if datum.context in constants.keys(): trueset = constants[datum.context][0] all_poss = constants[datum.context][1] margin = constants[datum.context][2] else: try: if datum.context.ego is None: trueset = {w: lexicon.make_word_data(w, datum.context) for w in lexicon.all_words()} else: trueset = {w: lexicon.make_word_data(w, datum.context, fixX=datum.context.ego) for w in lexicon.all_words()} all_poss = len(datum.context.objects) ** 2 all_poss_speakers = set([t[1] for t in trueset]) margin = float(sum(Fraction(1, d) ** s for d in xrange(1, len(all_poss_speakers) + 1))) constants[datum.context] = [trueset, all_poss, margin] except RecursionDepthException: return -Infinity if (datum.word, datum.X, datum.Y) in trueset[datum.word]: pS = (datum.context.distance[datum.Y] ** -s ) / margin pRgS = (datum.context.distance[datum.Y] ** -s) / sum( [(datum.context.distance[ref] ** -s) for ref in lexicon(datum.word, datum.context, set([datum.X]))]) ll += log(lexicon.alpha * pS * pRgS + ((1. - lexicon.alpha) / all_poss)) else: ll += log((1. - lexicon.alpha) / all_poss) return ll / lexicon.likelihood_temperature
def compute_likelihood(self, data, **kwargs): ll = 0 context = data[0].context # Hack same context for all likelihood trueset = self.make_true_data(context) all_poss = len(self.all_words()) * len(context.objects)**2 all_poss_speakers = set([t[1] for t in trueset]) margin = float( sum( Fraction(1, d)**self.s for d in xrange(1, len(all_poss_speakers) + 1))) for datum in data: if (datum.word, datum.X, datum.Y) in trueset: pS = (context.distance[datum.Y]**-self.s) / margin pRgS = (context.distance[datum.Y]**-self.s) / sum( [(context.distance[ref]**-self.s) for ref in self(self.words[0], context, set([datum.X]))]) ll += log(self.alpha * pS * pRgS + ((1. - self.alpha) / all_poss)) else: ll += log((1. - self.alpha) / all_poss) self.likelihood = ll / self.likelihood_temperature self.update_posterior() return self.likelihood
def compute_single_likelihood(self, datum): """Computes the likelihood of data. TODO: Make sure this precisely matches the number paper. """ response = self(*datum.input) if response == "undef" or response == None: return log(1.0 / 10.0) # if undefined, just sample from a base distribution else: return log((1.0 - datum.alpha) / 10.0 + datum.alpha * (response == datum.output))
def compute_single_likelihood(self, datum): """Computes the likelihood of data. TODO: Make sure this precisely matches the number paper. """ response = self(*datum.input) if response == 'undef' or response == None: return log( 1.0 / 10.0) # if undefined, just sample from a base distribution else: return log((1.0 - datum.alpha) / 10.0 + datum.alpha * (response == datum.output))
def compute_L_likelihood(self, data, eval=False, **kwargs): constants = dict() ll = 0 for di, datum in enumerate(data): # Cache constants if datum.context in constants.keys(): trueset = constants[datum.context][0] all_poss = constants[datum.context][3] else: try: trueset = self.make_true_data(datum.context) all_poss = len(datum.context.objects) constants[datum.context] = [trueset, all_poss] except RecursionDepthException: self.likelihood = -Infinity self.update_posterior() return self.likelihood # Make sure recursion is well formed if di == 0: if not eval and not self.canIrecurse(data, trueset): self.likelihood = -Infinity self.update_posterior() return self.likelihood # Calculate the single point likelihood p = (1. - self.alpha) / all_poss if (datum.word, datum.X, datum.Y) in trueset: p += self.alpha * len(trueset)**-1 ll += log(p) self.likelihood = ll / self.likelihood_temperature self.update_posterior() return self.likelihood
def compute_Zipf_likelihood(lexicon, data, s): constants = dict() ll = 0 for datum in data: if datum.context in constants.keys(): trueset = constants[datum.context][0] all_poss = constants[datum.context][1] margin = constants[datum.context][2] else: try: if datum.context.ego is None: trueset = { w: lexicon.make_word_data(w, datum.context) for w in lexicon.all_words() } else: trueset = { w: lexicon.make_word_data(w, datum.context, fixX=datum.context.ego) for w in lexicon.all_words() } all_poss = len(datum.context.objects)**2 all_poss_speakers = set([t[1] for t in trueset]) margin = float( sum( Fraction(1, d)**s for d in xrange(1, len(all_poss_speakers) + 1))) constants[datum.context] = [trueset, all_poss, margin] except RecursionDepthException: return -Infinity if (datum.word, datum.X, datum.Y) in trueset[datum.word]: pS = (datum.context.distance[datum.Y]**-s) / margin pRgS = (datum.context.distance[datum.Y]**-s) / sum([ (datum.context.distance[ref]**-s) for ref in lexicon(datum.word, datum.context, set([datum.X])) ]) ll += log(lexicon.alpha * pS * pRgS + ((1. - lexicon.alpha) / all_poss)) else: ll += log((1. - lexicon.alpha) / all_poss) return ll / lexicon.likelihood_temperature
def compute_word_ll(word, h, data): data = [dp for dp in data if dp.word == word] if len(data) == 0: return 0 context = data[0].context trueset = set() for x in context.objects: for y in h('', context, set([x])): # x must be a set here trueset.add((word, x, y)) all_poss = len(context.objects)**2 ll = 0 for datum in data: if (datum.word, datum.X, datum.Y) in trueset: ll += log(options.alpha / len(trueset) + ((1. - options.alpha) / all_poss)) else: ll += log((1. - options.alpha) / all_poss) return ll
def compute_likelihood(self, data, **kwargs): constants = dict() ll = 0 for di, datum in enumerate(data): if datum.context in constants.keys(): trueset = constants[datum.context][0] all_poss = constants[datum.context][1] else: try: if datum.context.ego is None: trueset = self.make_true_data(datum.context) #trueset = {w: self.make_word_data(w, datum.context) for w in self.all_words()} all_poss = len(self.all_words()) * len(datum.context.objects) ** 2 else: trueset = self.make_true_data(datum.context, fixX=datum.context.ego) #trueset = {w: self.make_word_data(w, datum.context, fixX=datum.context.ego) # for w in self.all_words()} all_poss = len(self.all_words())*len(datum.context.objects) constants[datum.context] = [trueset, all_poss] except RecursionDepthException: self.likelihood = -Infinity self.update_posterior() return self.likelihood # Check to see if you can recurse and if that matters if di == 0: if not self.canIrecurse(data, trueset): self.likelihood = -Infinity self.update_posterior() return self.likelihood if (datum.word, datum.X, datum.Y) in trueset: ll += log(self.alpha/len(trueset) + ((1.-self.alpha)/all_poss)) else: ll += log((1.-self.alpha)/all_poss) self.likelihood = ll / self.likelihood_temperature self.update_posterior() return self.likelihood
def compute_likelihood(self, s, data, word): ll = 0 context = data[0].context # Hack same context for all likelihood trueset = self.make_true_data(context) all_poss = len(self.all_words()) * len(context.objects) ** 2 all_poss_speakers = set([t[1] for t in trueset]) margin = float(sum(Fraction(1, d) ** s for d in xrange(1, len(all_poss_speakers) + 1))) for datum in data: if (datum.word, datum.X, datum.Y) in trueset: pS = (context.distance[datum.Y] ** -s) / margin pRgS = (context.distance[datum.Y] ** -s) / sum( [(context.distance[ref] ** -s) for ref in self(word, context, set([datum.X]))]) ll += log(self.alpha * pS * pRgS + ((1. - self.alpha) / all_poss)) else: ll += log((1. - self.alpha) / all_poss) self.likelihood = ll / self.likelihood_temperature self.update_posterior() return self.likelihood
def compute_single_likelihood(self, udi): """ Compute the likelihood of a single data point, udi, an utteranceData """ assert isinstance(udi, UtteranceData) # Types of utterances trues, falses, others = self.partition_utterances( udi.possible_utterances, udi.context) #print "T:", trues #print "F:", falses #print "U:", others u = udi.utterance # compute the weights all_weights = sum( map(lambda u: self.weightfunction(u, udi.context), udi.possible_utterances)) true_weights = sum( map(lambda u: self.weightfunction(u, udi.context), trues)) met_weights = sum( map(lambda u: self.weightfunction(u, udi.context), falses)) + true_weights w = self.weightfunction(u, udi.context) # the current word weight if (u in trues): p = self.palpha * self.alpha * w / true_weights + self.palpha * \ (1.0 - self.alpha) * w / met_weights + (1.0 - self.palpha) * w / \ all_weights # choose from the trues elif (u in falses): p = ifelse(true_weights == 0, 1.0, 1.0 - self.alpha) * self.palpha * w / met_weights + ( 1.0 - self.palpha ) * w / all_weights # choose from the trues else: p = ifelse(met_weights == 0, 1.0, (1.0 - self.palpha)) * w / all_weights """ TODO: WHY NOT THIS WAY, IGNORING tre_weights==0? Because if we sample, then we have 0 chance of getting a true when true_weights is like that. This causes problems in CCGLexicon w = self.weightfunction(u, udi.context) # the current word weight if (u in trues): p = self.palpha * (self.alpha * w / true_weights + (1.0 - self.alpha) * w / met_weights) + (1.0 - self.palpha) * w / all_weights # choose from the trues elif (u in falses): p = self.palpha * (1.0-self.alpha) * w / met_weights + (1.0 - self.palpha) * w / all_weights # choose from the trues else: p = (1.0 - self.palpha) * w / all_weights """ return log(p)
def compute_single_likelihood(self, udi): """ Compute the likelihood of a single data point, udi, an utteranceData """ assert isinstance(udi, UtteranceData) # Types of utterances trues, falses, others = self.partition_utterances( udi.possible_utterances, udi.context) #print "T:", trues #print "F:", falses #print "U:", others u = udi.utterance # compute the weights all_weights = sum(map( lambda u: self.weightfunction(u, udi.context), udi.possible_utterances )) true_weights = sum(map( lambda u: self.weightfunction(u, udi.context), trues)) met_weights = sum(map( lambda u: self.weightfunction(u, udi.context), falses)) + true_weights w = self.weightfunction(u, udi.context) # the current word weight if(u in trues): p = self.palpha * self.alpha * w / true_weights + self.palpha * \ (1.0 - self.alpha) * w / met_weights + (1.0 - self.palpha) * w / \ all_weights # choose from the trues elif (u in falses): p = ifelse(true_weights==0, 1.0, 1.0-self.alpha) * self.palpha * w / met_weights + (1.0 - self.palpha) * w / all_weights # choose from the trues else: p = ifelse(met_weights==0, 1.0, (1.0 - self.palpha)) * w / all_weights """ TODO: WHY NOT THIS WAY, IGNORING tre_weights==0? Because if we sample, then we have 0 chance of getting a true when true_weights is like that. This causes problems in CCGLexicon w = self.weightfunction(u, udi.context) # the current word weight if (u in trues): p = self.palpha * (self.alpha * w / true_weights + (1.0 - self.alpha) * w / met_weights) + (1.0 - self.palpha) * w / all_weights # choose from the trues elif (u in falses): p = self.palpha * (1.0-self.alpha) * w / met_weights + (1.0 - self.palpha) * w / all_weights # choose from the trues else: p = (1.0 - self.palpha) * w / all_weights """ return log(p)
def compute_likelihood(self, data, eval=False, **kwargs): constants = dict() ll = 0 for di, datum in enumerate(data): # Cache constants if datum.context in constants.keys(): trueset = constants[datum.context][0] egoset = constants[datum.context][1] egoRef = constants[datum.context][2] all_poss = constants[datum.context][3] else: try: trueset = self.make_true_data(datum.context) egoset = self.make_true_data(datum.context, fixX=datum.context.ego) egoRef = dict() for w in self.all_words(): rs = [ t[2] for t in self.make_word_data( w, datum.context, fixX=datum.context.ego) ] egoRef[w] = sum( map( lambda r: zipf(r, self.s, datum.context, len(datum.context.objects)), rs)) all_poss = len(datum.context.objects) constants[datum.context] = [ trueset, egoset, egoRef, all_poss ] except RecursionDepthException: self.likelihood = -Infinity self.update_posterior() return self.likelihood # Make sure recursion is well formed if di == 0: if not eval and not self.canIrecurse(data, trueset): self.likelihood = -Infinity self.update_posterior() return self.likelihood # Calculate the single point likelihood p = (1. - self.alpha) / all_poss if (datum.word, datum.X, datum.Y) in trueset: # Probability it's true and speaker centric pT = self.alpha * (1. - self.epsilon) # Probability of the speaker # pS = zipf(datum.X, self.s, datum.context, len(datum.context.objects)) # Probability of the referent given the speaker and the word pr = zipf(datum.Y, self.s, datum.context, len(datum.context.objects)) hout = self(datum.word, datum.context, set([datum.X])) hout.discard(datum.X) Z = sum( map( lambda r: zipf(r, self.s, datum.context, len(datum.context.objects)), hout)) p += pT * (pr / Z) if (datum.word, datum.X, datum.Y) in egoset: # Probability it's true and ego-centric pT = self.alpha * self.epsilon # Probability of the speaker # pS = zipf(datum.X, self.s, datum.context, len(datum.context.objects)) # Probability of the referent pR = zipf(datum.Y, self.s, datum.context, len(datum.context.objects)) / egoRef[datum.word] p += pT * pR ll += log(p) self.likelihood = ll / self.likelihood_temperature self.update_posterior() return self.likelihood