def compute_l1(self,load,save=True): print("subject:",self.inference_params.subject) print("predicate",self.inference_params.predicate) print("SIGs 1&2:",self.inference_params.sig1,self.inference_params.sig2) print("L1 SIG",self.inference_params.l1_sig1) # print("step_size",self.inference_params.step_size) # print("utt weight, qud weight",self.inference_params.freq_weight,self.inference_params.qud_weight) print("number of qud dimensions:",self.inference_params.number_of_qud_dimensions) # print("trival qud prior on?",self.inference_params.trivial_qud_prior) print("rationality:",self.inference_params.rationality) # print("sample number",self.inference_params.sample_number) message = "Running "+self.inference_params.model_type+" RSA with "+str(len(self.inference_params.possible_utterances))+" possible utterances and " + str(len(self.inference_params.quds)) print(message) # tf_l1_discrete_only_trivial : discrete without quds: can drop : # tf_l1_noncat : continuous quds: deprecate for now # tf_l1_discrete : rename to: exact # tf_l1_qud_only : the baseline model: rename # tf_l1_with_trivial : includes the trivial qud: omit for now # tf_l1_mixture : rename to: discrete approximate # tf_l1_only_trivial : rename # tf_l1: rename to: continuous approximate # options: exact, approximate, then: later: make object: approximate.hmm: remove hmm vs var from inference_params if self.inference_params.model_type=="discrete_exact": print("RUNNING DISCRETE EXACT MODEL") # if self.inference_params.only_trivial: # tf_results = tf_l1_discrete_only_trivial(self.inference_params) # else: self.tf_results = tf_l1_discrete(self.inference_params) # return None # elif self.inference_params.only_trivial: # print("RUNNING MODEL WITHOUT QUDS") # tf_results = tf_l1_only_trivial(self.inference_params) # self.world_samples = tf_results # return None # elif self.inference_params.model_type=='categorical': # print("is baseline?",self.inference_params.baseline) # if self.inference_params.baseline: # print("RUNNING BASELINE MODEL") # # tf_results = tf_l1_qud_only(self.inference_params) # self.qud_samples = tf_l1_qud_only(self.inference_params) # return None # elif self.inference_params.trivial_qud_prior: # print("RUNNING CAT WITH TRIVIAL MODEL") # tf_results = tf_l1_with_trivial(self.inference_params) elif self.inference_params.model_type=="discrete_mixture": print("RUNNING DISCRETE MIXTURE MODEL") self.tf_results = tf_l1_mixture(self.inference_params) elif self.inference_params.model_type=="qud_only": print("RUNNING QUD ONLY MODEL") self.tf_results = tf_l1_qud_only(self.inference_params) print("results",self.tf_results) elif self.inference_params.model_type=="numpy_discrete_mixture": print("RUNNING NUMPY DISCRETE MIXTURE MODEL") self.tf_results = np_l1_mixture(self.inference_params) print("results",self.tf_results) elif self.inference_params.model_type=="baseline": print("RUNNING BASELINE MODEL") from dist_rsa.utils.load_data import get_words vecs = self.inference_params.vecs subj = self.inference_params.subject pred = self.inference_params.predicate self.inference_params.qud_combinations = [[q] for q in self.inference_params.quds] nouns,adjs = get_words(with_freqs=False) qud_words = [a for a in adjs if adjs[a] if a in vecs and a!=pred] self.inference_params.qud_marginals = [-scipy.spatial.distance.cosine(np.mean([vecs[subj[0]],vecs[pred]],axis=0),vecs[qud]) for qud in qud_words][:100] sorted_qud_words = sorted(qud_words,key=lambda x:scipy.spatial.distance.cosine(np.mean([vecs[subj[0]],vecs[pred]],axis=0),vecs[x])) self.inference_params.ordered_quds = sorted_qud_words # list(list(zip(*self.inference_params.qud_marginals))[0]) self.tf_results = None print("results",self.tf_results)
from __future__ import division from collections import defaultdict import scipy import numpy as np import pickle import itertools from dist_rsa.dbm import * from dist_rsa.utils.load_data import * from dist_rsa.utils.helperfunctions import * from dist_rsa.utils.load_data import get_words from dist_rsa.lm_1b_eval import predict vecs = load_vecs(mean=True, pca=True, vec_length=300, vec_type='glove.6B.') nouns, adjs = get_words() def l1_cat_2d_exp(metaphor): vec_size, vec_kind = 25, 'glove.twitter.27B.' subj, pred = metaphor abstract_threshold = 2.5 print('abstract_threshold', abstract_threshold) concrete_threshold = 3.0 print('concrete_threshold', concrete_threshold) qud_words = [ a for a in list(adjs) if adjs[a] < abstract_threshold and a in vecs ] sig2_distance = scipy.spatial.distance.cosine(vecs[subj], vecs[pred]) # prob_dict = get_freqs(preprocess=False)
def l1_iden_1d(metaphor): vecs = pickle.load(open("dist_rsa/data/word_vectors/glove.6B.mean_vecs300", 'rb'), encoding='latin1') nouns, adjs = get_words() vec_size, vec_kind = (25, 'glove.twitter.27B.') subj, pred = metaphor abstract_threshold = 2.5 print('abstract_threshold', abstract_threshold) concrete_threshold = 3.0 print('concrete_threshold', concrete_threshold) qud_words = [n for n in nouns if nouns[n] < 4.0 and n in vecs] qud_words = sorted(qud_words,\ key=lambda x:scipy.spatial.distance.cosine(vecs[x],np.mean([vecs[subj],vecs[pred]],axis=0)),reverse=False) # print(quds) # break quds = qud_words[:30] # prob_dict = predict(" ".join([subj, "are"])) possible_utterance_nouns = sorted([a for a in adjs if adjs[a] > concrete_threshold and a in vecs],\ # key=lambda x:prob_dict[x],reverse=True) key=lambda x:scipy.spatial.distance.cosine(vecs[x],np.mean([vecs[subj],vecs[pred]],axis=0)),reverse=False) possible_utterances = possible_utterance_nouns[:30] print("QUDS:\n", quds[:20]) print("UTTERANCES:\n", possible_utterances[:20]) run = DistRSAInference( subject=[subj], predicate=pred, # possible_utterances=animals, # quds=animal_features, quds=quds, # quds = animal_features, # ['unyielding']+list(list(zip(*visualize_cosine(np.mean([vecs['man'],vecs[word]],axis=0),freq_sorted_adjs,vecs)[:500:10]))[0]), possible_utterances=list(set(possible_utterances).union(set([pred]))), # possible_utterances= # [noun for noun in nouns if noun not in adjectives][:100]+[adj for adj in adjectives if adj not in nouns][:100]+[pred], # sorted_nouns[sorted_nouns.index(pred) if pred in sorted_nouns else 500]+['horse'], object_name="animals_spec", mean_vecs=True, pca_remove_top_dims=True, sig1=0.0005, sig2=0.01, # proposed_sig2*scaling_factor, qud_weight=0.0, freq_weight=0.0, categorical="categorical", vec_length=vec_size, vec_type=vec_kind, sample_number=400, number_of_qud_dimensions=1, burn_in=285, seed=False, trivial_qud_prior=True, step_size=0.0005, frequencies=defaultdict(lambda: 1), qud_frequencies=defaultdict(lambda: 1), qud_prior_weight=0.9, rationality=1.0, run_s2=False, speaker_world=vecs[subj], s1_only=False, norm_vectors=True) real_vecs = pickle.load(open( "dist_rsa/data/word_vectors/" + vec_kind + "pca and mean" + str(vec_size), 'rb'), encoding='latin1') # print(real_vecs[subj],real_vecs[pred]) run.compute_results(load=0, save=False) # print(run.world_movement("cosine",do_projection=True,comparanda=[x for x in abstract_adjs+abstract_nouns if x in real_vecs])) print(run.qud_results()) # print("QUDS:\n",results[:20]) # print("WORLD MOVEMENT\n:",run.world_movement("cosine",comparanda=[x for x in quds if x in real_vecs])[:50]) # print("WORLD MOVEMENT\n:",run.world_movement("euclidean",comparanda=[x for x in quds if x in real_vecs])[:50]) # print("BASELINE:\n:",run.baseline_model('mean')[:20]) return run.qud_samples