def makeVariableLexiconData(lexicon, word, context, n=100, s=1.0, alpha=0.9, verbose=False): data = [] true_set = lexicon.make_true_data(context) all_poss_speakers = [t[1] for t in true_set] p = [zipf(t, s, context, len(context.objects)) for t in all_poss_speakers] for i in xrange(n): if flip(alpha): speaker = weighted_sample(all_poss_speakers, probs=p) referents = lexicon(word, context, set([speaker])) p1 = [zipf(t, s, context, len(context.objects)) for t in referents] referent = weighted_sample(referents, probs=p1) if verbose: print "True data:", i, word, speaker, referent data.append(KinshipData(word, speaker, referent, context)) else: x = sample1(context.objects) y = sample1(context.objects) if verbose: print "Noise data:", i, word, x, y data.append(KinshipData(word, x, y, context)) if verbose: print lexicon.compute_likelihood(data) return data
def makeZipfianLexiconData(lexicon, word, context, n=100, s=1.0, alpha=0.9, verbose=False): # TODO remove word param from Shift files data = [] true_set = lexicon.make_true_data(context) all_poss_speakers = [ t[1] for t in true_set ] p = [ zipf(t, s, context, len(context.objects)) for t in all_poss_speakers ] for i in xrange(n): if flip(alpha): speaker = weighted_sample(all_poss_speakers, probs=p) bagR = {w : lexicon(w, context, set([speaker])) for w in lexicon.all_words()} uniqR = [] for w in lexicon.all_words(): uniqR.extend(bagR[w]) p1 = [ zipf(t, s, context, len(context.objects)) for t in uniqR ] referent = weighted_sample(uniqR, probs=p1) word = sample1([w for w in lexicon.all_words() if referent in bagR[w]]) if verbose: print "True data:", i, word, speaker, referent data.append(KinshipData(word, speaker, referent, context)) else: word = sample1(lexicon.all_words()) x = sample1(context.objects) y = sample1(context.objects) if verbose: print "Noise data:", i, word, x, y data.append(KinshipData(word, x, y, context)) if verbose: print lexicon.compute_likelihood(data) return data
def makeVariableLexiconData(lexicon, word, context, n=100, s=1.0, alpha=0.9, verbose=False): data = [] true_set = lexicon.make_true_data(context) all_poss_speakers = [ t[1] for t in true_set ] p = [ zipf(t, s, context, len(context.objects)) for t in all_poss_speakers ] for i in xrange(n): if flip(alpha): speaker = weighted_sample(all_poss_speakers, probs=p) referents = lexicon(word, context, set([speaker])) p1 = [ zipf(t, s, context, len(context.objects)) for t in referents ] referent = weighted_sample(referents, probs=p1) if verbose: print "True data:", i, word, speaker, referent data.append(KinshipData(word, speaker, referent, context)) else: x = sample1(context.objects) y = sample1(context.objects) if verbose: print "Noise data:", i, word, x, y data.append(KinshipData(word, x, y, context)) if verbose: print lexicon.compute_likelihood(data) return data
def makeZipfianLexiconData(lexicon, context, dfreq=None, n=100, s=1.0, alpha=0.9, epsilon=0.8, verbose=False): ''' L() --> P(W) [ eps P(S|W) P(R|W) + 1-eps P(S|W) P(R|SW)] P(W) ~ dfreq or defaults to uniform P(S|W) ~ Zipf(s) domain: all speakers that can use that word P(R|W) ~ Zipf(s) domain: all people the learner has a word for P(R|SW) ~ Zipf(s) domain: all referents the speaker can use the word to refer to :param lexicon: the target lexicon :param context: the context :param dfreq: dictionary[word] = frequency weight (float) :param n: the number of data points :param s: the zipfian exponent parameter :param alpha: the reliability parameter. Noise = 1 - alpha :param epsilon: the ego-centric probability :param verbose: print the generated data points :return: list of KinshipData objects ''' assert context.distance is not None, "There are no distances in the context!" if dfreq is not None: assert set(lexicon.all_words()).issubset(set( dfreq.keys())), "Words in lexicon without frequencies" freq = lambda w: dfreq[w] else: freq = None data = [] speakers = dict() egoRef = dict() for w in lexicon.all_words(): speakers[w] = [t[1] for t in lexicon.make_word_data(w, context)] egoRef[w] = [ t[2] for t in lexicon.make_word_data(w, context, fixX=context.ego) ] for i in xrange(n): if flip(alpha): wrd = weighted_sample(lexicon.all_words(), probs=freq) speaker = weighted_sample( speakers[wrd], probs=lambda x: zipf(x, s, context, len(context.objects))) if flip(epsilon): referent = weighted_sample( egoRef[wrd], probs=lambda x: zipf(x, s, context, len(context.objects))) eps = 'Ego' else: referent = weighted_sample( lexicon(wrd, context, set([speaker])), probs=lambda x: zipf(x, s, context, len(context.objects))) eps = 'Speaker' if verbose: print "True data:", i, wrd, speaker, referent, eps data.append(KinshipData(wrd, speaker, referent, context)) else: wrd = weighted_sample(lexicon.all_words(), probs=freq) x = weighted_sample( context.objects, probs=lambda x: zipf(x, s, context, len(context.objects))) y = weighted_sample( context.objects, probs=lambda x: zipf(x, s, context, len(context.objects))) if verbose: print "Noise data:", i, wrd, x, y data.append(KinshipData(wrd, x, y, context)) if verbose: print lexicon.compute_likelihood(data) return data