class WeightedAdditiveModel(AdditiveModel): weighted_additive = None new_space = None def __init__(self, space, alpha=None, beta=None, no_diff=False): AdditiveModel.__init__(self, space, no_diff=no_diff) self.weighted_additive = WeightedAdditive(alpha=alpha, beta=beta) def fit(self, train_pairs, verbose=False): AdditiveModel.fit(self, train_pairs, verbose=verbose) if verbose: print 'fit: Fitting a weighted additive model on %d pairs' % (len(train_pairs)) # First, we embed the derived vector into the original space (by simply adding a row) vec_space = Space(self.diff_vector, ['pattern_vector'], []) self.new_space = Space.vstack(self.space, vec_space) # class is designed to be run on a dataset with different function words (==patterns). # We use a dummy function word here. train_pairs_ext = [(base, 'pattern_vector', derived) for (base, derived) in train_pairs] self.weighted_additive.train(train_pairs_ext, self.new_space, self.new_space) def predict(self, base, verbose=False): if self.weighted_additive is None: raise NameError('Error: Model has not yet been trained') composed_space = self.weighted_additive.compose([(base, 'pattern_vector', 'derived')], self.new_space) return composed_space.get_row('derived')
def test_space_train(self): test_cases = [ ([("a", "b", "a_b"), ("a", "a", "a_a")], self.space1, Space(DenseMatrix(np.mat([[12,3],[6,2]])), ["a_b", "a_a"],["f1", "f2"]), 1, 1 ), ([("a", "b", "a_b"), ("a", "a", "a_a")], self.space1, Space(DenseMatrix(np.mat([[0,0],[0,0]])), ["a_b", "a_a"],["f1", "f2"]), 0, 0 ), ([("a", "b", "a_b"), ("a", "a", "a_a")], self.space1, Space(DenseMatrix(np.mat([[0,0],[0,0]])), ["a_b", "a_a"],[]), 0, 0 ), ([("a", "b", "a_b")], self.space1, Space(DenseMatrix(np.mat([[21,5]])), ["a_b"],[]), 1, 2 ), ([("a", "b", "a_b"), ("bla", "b", "a_b"), ("a", "bla", "a_b")], self.space1, Space(DenseMatrix(np.mat([[21,5]])), ["a_b"],[]), 1, 2 ) ] for in_data, arg_space, phrase_space, alpha, beta in test_cases: model = WeightedAdditive() model.train(in_data, arg_space, phrase_space) self.assertAlmostEqual(model.alpha, alpha, 7) self.assertAlmostEqual(model.beta, beta, 7) comp_space = model.compose(in_data, arg_space) self.assertListEqual(comp_space.id2row, phrase_space.id2row) self.assertListEqual(comp_space.id2column, phrase_space.id2column) self.assertDictEqual(comp_space.row2id, phrase_space.row2id) self.assertDictEqual(comp_space.column2id, phrase_space.column2id) np.testing.assert_array_almost_equal(comp_space.cooccurrence_matrix.mat, phrase_space.cooccurrence_matrix.mat, 8)
#ex10.py #------- from composes.utils import io_utils from composes.composition.weighted_additive import WeightedAdditive #load a space my_space = io_utils.load("./data/out/ex10.pkl") print my_space.id2row print my_space.cooccurrence_matrix # instantiate a weighted additive model my_comp = WeightedAdditive(alpha = 1, beta = 1) # use the model to compose words in my_space composed_space = my_comp.compose([("good", "book", "good_book"), ("good", "car", "good_car")], my_space) print composed_space.id2row print composed_space.cooccurrence_matrix #save the composed space io_utils.save(composed_space, "data/out/PHRASE_SS.ex10.pkl")
#compute multiplication/addition of a list of word pairs fname = sys.argv[2] word_pairs = io_utils.read_tuple_list(fname, fields=[0,1]) lengths=[] found=True for wp in word_pairs: try: v1=my_space.get_row(wp[0]) v2=my_space.get_row(wp[1]) except KeyError: #print wp[0],"or",wp[1],"not found" found=False if found: composed_space = add.compose([(wp[0], wp[1], "_composed_")], my_space) neighbours=composed_space.get_neighbours("_composed_", 10, CosSimilarity(),space2=my_space) print wp[0],wp[1] print neighbours density=0 for n in neighbours: density+=n[1] density=density/10 print "Density",density c=composed_space.get_row("_composed_") print "Norm ",c.norm() cos=composed_space.get_sim("_composed_",wp[1], CosSimilarity(), space2=my_space) print "Cos ",cos print "--" else: found=True
recipes[words[0]] = words[1:] if len(words)-1 > max_size: max_size = len(words)-1 WA = WeightedAdditive(alpha = 1, beta = 1) last_space = None number = count() for size in xrange(max_size,1,-1): relevant = (rec for rec in recipes if len(recipes[rec]) == size) print(size) composition = [] for recipe in relevant: old = recipes[recipe] if size == 2: name = recipe else: name = "comp_" + str(next(number)) if old[-2] in stacked_space.id2row: composition.append((old[-1],old[-2],name)) recipes[recipe].pop(-1) recipes[recipe].pop(-1) recipes[recipe].append(name) else: recipes[recipe].pop(-2) if composition: last_space = WA.compose(composition, stacked_space) if size != 2: stacked_space = Space.vstack(stacked_space, last_space) io_utils.save(last_space, "recicomp.pkl")
########################################################################## from composes.utils import io_utils from composes.composition.weighted_additive import WeightedAdditive from composes.similarity.cos import CosSimilarity import sys pkl=sys.argv[1] base=sys.argv[2] minus=sys.argv[3] plus=sys.argv[4] space = io_utils.load(pkl) # instantiate an additive and subtractive model add = WeightedAdditive(alpha = 1, beta = 1) sub = WeightedAdditive(alpha = 1, beta = -1) #print space.get_neighbours(base, 10, CosSimilarity()) print "Subtracting",minus,"from",base composed_space = sub.compose([(base, minus, "step1")], space) #print composed_space.get_neighbours("step1", 10, CosSimilarity(),space) print "Adding",plus,"..." composed_space2 = add.compose([("step1", plus, "step2")], (composed_space,space)) print composed_space2.get_neighbours("step2", 10, CosSimilarity(),space)
els_for_comp.append(element) return els_for_comp typ_space = create_space(TypDmFile, TypRowsFile) distr_space = create_space(DistrDmFile, DistrRowsFile) #load a space from a pickle file #my_space = io_utils.load("./sharp/lexfunc/lexfunc_Ridge_pract.pkl") #distributional vectors processing distr_space = distr_space.apply(PpmiWeighting()) distr_space = distr_space.apply(Svd(300)) #io_utils.save(distr_space, "./spaces/smooth_phrases_ppmi.pkl") items = items_from_file(itemsFile) els_for_comp = elements_for_composition(items) my_comp = WeightedAdditive(alpha=1, beta=1) distr_space = my_comp.compose(els_for_comp, distr_space) pairs = pairs(items) predicted = distr_space.get_sims(pairs, CosSimilarity()) gold = typ_space.get_sims(pairs, CosSimilarity()) #compute correlations print "Spearman" print scoring_utils.score(gold, predicted, "spearman") print "Pearson" print scoring_utils.score(gold, predicted, "pearson")
#compute multiplication/addition of a list of word pairs fname = sys.argv[2] word_pairs = io_utils.read_tuple_list(fname, fields=[0, 1]) lengths = [] found = True for wp in word_pairs: try: v1 = my_space.get_row(wp[0]) v2 = my_space.get_row(wp[1]) except KeyError: #print wp[0],"or",wp[1],"not found" found = False if found: composed_space = add.compose([(wp[0], wp[1], "_composed_")], my_space) neighbours = composed_space.get_neighbours("_composed_", 10, CosSimilarity(), space2=my_space) print wp[0], wp[1] print neighbours density = 0 for n in neighbours: density += n[1] density = density / 10 print "Density", density c = composed_space.get_row("_composed_") print "Norm ", c.norm() cos = composed_space.get_sim("_composed_", wp[1],
# format = "dm", # data ="SOME_PATH_FOR_A_WORD_TO_VEC_PERIPHERAL_SPACE_DATA" # ) # Debug # print space.cooccurrence_matrix # print space.id2row # instantiate a weighted additive model my_comp = WeightedAdditive(alpha = 1, beta = 1) # use the model to compose words in my_space composed_space = my_comp.compose([("good", "book", "good_book"), ("good", "car", "good_car")], space) print composed_space.id2row print composed_space.cooccurrence_matrix print composed_space.get_sims([("good_car","good_book")], CosSimilarity()) # Similarity metric #=============================================================================================================== print "="*80 #=============================================================================================================== ##Training Models ##Training Weight Additive Models #training data train_data = [("good", "car", "good_car"),
ingredients = [] print("Enter ingredients, enter when done") while True: ingredient = raw_input("> ").replace(" ","_") if ingredient == "": break if ingredient not in stacked.id2row: print("(not found, skipping)") continue ingredients.append(ingredient) name = "" while True: (a,b) = ingredients.pop(-1),ingredients.pop(-1) name = "comp_" + str(next(number)) ingredients.append(name) new_space = WA.compose([(a,b,name)], stacked) if len(ingredients) > 1: stacked = Space.vstack(stacked, new_space) else: break stacked = Space.vstack(recicomp, new_space) top = [] for recipe in stacked.id2row: if recipe == name: continue sim = stacked.get_sim(recipe, name, CosSimilarity()) ins(top, (sim,recipe)) print("Nearest neighbors:",", ".join([x[1].replace("_"," ") + " (" + str(x[0]) + ")" for x in top]))