def add_zero_idenity_matrix(matrix_space, vector_length):
    zero_mat = np.zeros((1,vector_length * vector_length))
    identity_mat = np.reshape(np.eye(vector_length),(1, vector_length * vector_length))
    matrix = DenseMatrix(np.vstack([zero_mat, identity_mat]))
    rows = ["cg.zeromat","cg.identmat"]
    additional_space = Space(matrix, rows, [])
    return Space.vstack(matrix_space, additional_space)
예제 #2
0
def vstack(s1, s2):
    if not s1:
        return  s2
    if not s2:
        return s1
    else:
        return Space.vstack(s1, s2)
def add_one_zero_vector(core_space):
    length = core_space.cooccurrence_matrix.shape[1]
    zero_vector = np.zeros((1,length))
    one_vector = np.ones((1,length))
    matrix = DenseMatrix(np.vstack([zero_vector, one_vector]))
    rows = ["cg.zerovec","cg.onevec"]
    additional_space = Space(matrix, rows, [])
    return Space.vstack(core_space, additional_space)
예제 #4
0
파일: Models.py 프로젝트: jsnajder/derivsem
 def fit(self, train_pairs, verbose=False):
     AdditiveModel.fit(self, train_pairs, verbose=verbose)
     if verbose:
         print 'fit: Fitting a weighted additive model on %d pairs' % (len(train_pairs))
     # First, we embed the derived vector into the original space (by simply adding a row)
     vec_space = Space(self.diff_vector, ['pattern_vector'], [])
     self.new_space = Space.vstack(self.space, vec_space)
     #  class is designed to be run on a dataset with different function words (==patterns).
     # We use a dummy function word here.
     train_pairs_ext = [(base, 'pattern_vector', derived) for (base, derived) in train_pairs]
     self.weighted_additive.train(train_pairs_ext, self.new_space, self.new_space)
예제 #5
0
def train_all_spaces(core_space, an_dn_space, pn_space, sv_space, vo_space):
    core_space = core_space.apply(RowNormalization())
    print "train adj, det"
    a_d_space = train_one_space(core_space, an_dn_space, 0, 3)
    print "train prep"
    prep_space = train_one_space(core_space, pn_space, 1, 3)
    print "train vo"
    v_obj_space = train_one_space(core_space, vo_space, 0, 4)
    print "train sv"
    v_subj_space = train_one_space(core_space, sv_space, 1, 4)
    
    new_v_obj_rows = [row + ".objmat" for row in v_obj_space.id2row]
    v_obj_space._id2row = new_v_obj_rows
    v_obj_space._row2id = list2dict(new_v_obj_rows)
    
    new_v_subj_rows = [row + ".subjmat" for row in v_subj_space.id2row]
    v_subj_space._id2row = new_v_subj_rows
    v_subj_space._row2id = list2dict(new_v_subj_rows)
    
    all_mat_space = Space.vstack(a_d_space, prep_space)
    all_mat_space = Space.vstack(v_obj_space, all_mat_space)
    all_mat_space = Space.vstack(v_subj_space, all_mat_space)
    return all_mat_space
        recipes[words[0]] = words[1:]
        if len(words)-1 > max_size:
            max_size = len(words)-1

WA = WeightedAdditive(alpha = 1, beta = 1)
last_space = None
number = count()
for size in xrange(max_size,1,-1):
    relevant = (rec for rec in recipes if len(recipes[rec]) == size)
    print(size)
    composition = []
    for recipe in relevant:
        old = recipes[recipe]
        if size == 2:
            name = recipe
        else:
            name = "comp_" + str(next(number))
        if old[-2] in stacked_space.id2row:
            composition.append((old[-1],old[-2],name))
            recipes[recipe].pop(-1)
            recipes[recipe].pop(-1)
            recipes[recipe].append(name)
        else:
            recipes[recipe].pop(-2)
    if composition:
        last_space = WA.compose(composition, stacked_space)
        if size != 2:
            stacked_space = Space.vstack(stacked_space, last_space)

io_utils.save(last_space, "recicomp.pkl")
ingredients = []
print("Enter ingredients, enter when done")
while True:
    ingredient = raw_input("> ").replace(" ","_")
    if ingredient == "":
        break
    if ingredient not in stacked.id2row:
        print("(not found, skipping)")
        continue
    ingredients.append(ingredient)

name = ""
while True:
    (a,b) = ingredients.pop(-1),ingredients.pop(-1)
    name = "comp_" + str(next(number))
    ingredients.append(name)
    new_space = WA.compose([(a,b,name)], stacked)
    if len(ingredients) > 1:
        stacked = Space.vstack(stacked, new_space)
    else:
        break

stacked = Space.vstack(recicomp, new_space)
top = []
for recipe in stacked.id2row:
    if recipe == name:
        continue
    sim = stacked.get_sim(recipe, name, CosSimilarity())
    ins(top, (sim,recipe))
print("Nearest neighbors:",", ".join([x[1].replace("_"," ") + " (" + str(x[0]) + ")" for x in top]))