def add_zero_idenity_matrix(matrix_space, vector_length): zero_mat = np.zeros((1,vector_length * vector_length)) identity_mat = np.reshape(np.eye(vector_length),(1, vector_length * vector_length)) matrix = DenseMatrix(np.vstack([zero_mat, identity_mat])) rows = ["cg.zeromat","cg.identmat"] additional_space = Space(matrix, rows, []) return Space.vstack(matrix_space, additional_space)
def vstack(s1, s2): if not s1: return s2 if not s2: return s1 else: return Space.vstack(s1, s2)
def add_one_zero_vector(core_space): length = core_space.cooccurrence_matrix.shape[1] zero_vector = np.zeros((1,length)) one_vector = np.ones((1,length)) matrix = DenseMatrix(np.vstack([zero_vector, one_vector])) rows = ["cg.zerovec","cg.onevec"] additional_space = Space(matrix, rows, []) return Space.vstack(core_space, additional_space)
def fit(self, train_pairs, verbose=False): AdditiveModel.fit(self, train_pairs, verbose=verbose) if verbose: print 'fit: Fitting a weighted additive model on %d pairs' % (len(train_pairs)) # First, we embed the derived vector into the original space (by simply adding a row) vec_space = Space(self.diff_vector, ['pattern_vector'], []) self.new_space = Space.vstack(self.space, vec_space) # class is designed to be run on a dataset with different function words (==patterns). # We use a dummy function word here. train_pairs_ext = [(base, 'pattern_vector', derived) for (base, derived) in train_pairs] self.weighted_additive.train(train_pairs_ext, self.new_space, self.new_space)
def train_all_spaces(core_space, an_dn_space, pn_space, sv_space, vo_space): core_space = core_space.apply(RowNormalization()) print "train adj, det" a_d_space = train_one_space(core_space, an_dn_space, 0, 3) print "train prep" prep_space = train_one_space(core_space, pn_space, 1, 3) print "train vo" v_obj_space = train_one_space(core_space, vo_space, 0, 4) print "train sv" v_subj_space = train_one_space(core_space, sv_space, 1, 4) new_v_obj_rows = [row + ".objmat" for row in v_obj_space.id2row] v_obj_space._id2row = new_v_obj_rows v_obj_space._row2id = list2dict(new_v_obj_rows) new_v_subj_rows = [row + ".subjmat" for row in v_subj_space.id2row] v_subj_space._id2row = new_v_subj_rows v_subj_space._row2id = list2dict(new_v_subj_rows) all_mat_space = Space.vstack(a_d_space, prep_space) all_mat_space = Space.vstack(v_obj_space, all_mat_space) all_mat_space = Space.vstack(v_subj_space, all_mat_space) return all_mat_space
recipes[words[0]] = words[1:] if len(words)-1 > max_size: max_size = len(words)-1 WA = WeightedAdditive(alpha = 1, beta = 1) last_space = None number = count() for size in xrange(max_size,1,-1): relevant = (rec for rec in recipes if len(recipes[rec]) == size) print(size) composition = [] for recipe in relevant: old = recipes[recipe] if size == 2: name = recipe else: name = "comp_" + str(next(number)) if old[-2] in stacked_space.id2row: composition.append((old[-1],old[-2],name)) recipes[recipe].pop(-1) recipes[recipe].pop(-1) recipes[recipe].append(name) else: recipes[recipe].pop(-2) if composition: last_space = WA.compose(composition, stacked_space) if size != 2: stacked_space = Space.vstack(stacked_space, last_space) io_utils.save(last_space, "recicomp.pkl")
ingredients = [] print("Enter ingredients, enter when done") while True: ingredient = raw_input("> ").replace(" ","_") if ingredient == "": break if ingredient not in stacked.id2row: print("(not found, skipping)") continue ingredients.append(ingredient) name = "" while True: (a,b) = ingredients.pop(-1),ingredients.pop(-1) name = "comp_" + str(next(number)) ingredients.append(name) new_space = WA.compose([(a,b,name)], stacked) if len(ingredients) > 1: stacked = Space.vstack(stacked, new_space) else: break stacked = Space.vstack(recicomp, new_space) top = [] for recipe in stacked.id2row: if recipe == name: continue sim = stacked.get_sim(recipe, name, CosSimilarity()) ins(top, (sim,recipe)) print("Nearest neighbors:",", ".join([x[1].replace("_"," ") + " (" + str(x[0]) + ")" for x in top]))