コード例 #1
0
ファイル: Models.py プロジェクト: jsnajder/derivsem
class WeightedAdditiveModel(AdditiveModel):

    weighted_additive = None
    new_space = None

    def __init__(self, space, alpha=None, beta=None, no_diff=False):
        AdditiveModel.__init__(self, space, no_diff=no_diff)
        self.weighted_additive = WeightedAdditive(alpha=alpha, beta=beta)

    def fit(self, train_pairs, verbose=False):
        AdditiveModel.fit(self, train_pairs, verbose=verbose)
        if verbose:
            print 'fit: Fitting a weighted additive model on %d pairs' % (len(train_pairs))
        # First, we embed the derived vector into the original space (by simply adding a row)
        vec_space = Space(self.diff_vector, ['pattern_vector'], [])
        self.new_space = Space.vstack(self.space, vec_space)
        #  class is designed to be run on a dataset with different function words (==patterns).
        # We use a dummy function word here.
        train_pairs_ext = [(base, 'pattern_vector', derived) for (base, derived) in train_pairs]
        self.weighted_additive.train(train_pairs_ext, self.new_space, self.new_space)

    def predict(self, base, verbose=False):
        if self.weighted_additive is None:
            raise NameError('Error: Model has not yet been trained')
        composed_space = self.weighted_additive.compose([(base, 'pattern_vector', 'derived')], self.new_space)
        return composed_space.get_row('derived')
コード例 #2
0
    def test_space_train(self):
        test_cases = [ ([("a", "b", "a_b"), ("a", "a", "a_a")],
                        self.space1,
                        Space(DenseMatrix(np.mat([[12,3],[6,2]])),
                              ["a_b", "a_a"],["f1", "f2"]),
                        1, 1
                       ),
                      ([("a", "b", "a_b"), ("a", "a", "a_a")],
                        self.space1,
                        Space(DenseMatrix(np.mat([[0,0],[0,0]])),
                              ["a_b", "a_a"],["f1", "f2"]),
                        0, 0
                       ),
                      ([("a", "b", "a_b"), ("a", "a", "a_a")],
                        self.space1,
                        Space(DenseMatrix(np.mat([[0,0],[0,0]])),
                              ["a_b", "a_a"],[]),
                        0, 0
                       ),
                      ([("a", "b", "a_b")],
                        self.space1,
                        Space(DenseMatrix(np.mat([[21,5]])),
                              ["a_b"],[]),
                        1, 2
                       ),
                       ([("a", "b", "a_b"), ("bla", "b", "a_b"), ("a", "bla", "a_b")],
                        self.space1,
                        Space(DenseMatrix(np.mat([[21,5]])),
                              ["a_b"],[]),
                        1, 2
                       )
                      ]

        for in_data, arg_space, phrase_space, alpha, beta in test_cases:
            model = WeightedAdditive()
            model.train(in_data, arg_space, phrase_space)

            self.assertAlmostEqual(model.alpha, alpha, 7)
            self.assertAlmostEqual(model.beta, beta, 7)

            comp_space = model.compose(in_data, arg_space)
            self.assertListEqual(comp_space.id2row, phrase_space.id2row)
            self.assertListEqual(comp_space.id2column, phrase_space.id2column)

            self.assertDictEqual(comp_space.row2id, phrase_space.row2id)
            self.assertDictEqual(comp_space.column2id, phrase_space.column2id)

            np.testing.assert_array_almost_equal(comp_space.cooccurrence_matrix.mat,
                                                 phrase_space.cooccurrence_matrix.mat,
                                                 8)
コード例 #3
0
ファイル: ex10.py プロジェクト: totonac/dissect
#ex10.py
#-------
from composes.utils import io_utils
from composes.composition.weighted_additive import WeightedAdditive

#load a space
my_space = io_utils.load("./data/out/ex10.pkl")

print my_space.id2row
print my_space.cooccurrence_matrix

# instantiate a weighted additive model
my_comp = WeightedAdditive(alpha = 1, beta = 1)

# use the model to compose words in my_space
composed_space = my_comp.compose([("good", "book", "good_book"),
                                  ("good", "car", "good_car")],
                                 my_space)

print composed_space.id2row
print composed_space.cooccurrence_matrix

#save the composed space
io_utils.save(composed_space, "data/out/PHRASE_SS.ex10.pkl")


コード例 #4
0
#compute multiplication/addition of a list of word pairs
fname = sys.argv[2]
word_pairs = io_utils.read_tuple_list(fname, fields=[0,1])

lengths=[]
found=True
for wp in word_pairs:
	try:
		v1=my_space.get_row(wp[0])
		v2=my_space.get_row(wp[1])
	except KeyError:
		#print wp[0],"or",wp[1],"not found"
		found=False
	if found:
		composed_space = add.compose([(wp[0], wp[1], "_composed_")], my_space)
		neighbours=composed_space.get_neighbours("_composed_", 10, CosSimilarity(),space2=my_space)
		print wp[0],wp[1]
		print neighbours
		density=0
		for n in neighbours:
			density+=n[1]
		density=density/10
		print "Density",density
		c=composed_space.get_row("_composed_")
		print "Norm ",c.norm()
		cos=composed_space.get_sim("_composed_",wp[1], CosSimilarity(), space2=my_space)
		print "Cos ",cos
		print "--"
	else:
		found=True
コード例 #5
0
        recipes[words[0]] = words[1:]
        if len(words)-1 > max_size:
            max_size = len(words)-1

WA = WeightedAdditive(alpha = 1, beta = 1)
last_space = None
number = count()
for size in xrange(max_size,1,-1):
    relevant = (rec for rec in recipes if len(recipes[rec]) == size)
    print(size)
    composition = []
    for recipe in relevant:
        old = recipes[recipe]
        if size == 2:
            name = recipe
        else:
            name = "comp_" + str(next(number))
        if old[-2] in stacked_space.id2row:
            composition.append((old[-1],old[-2],name))
            recipes[recipe].pop(-1)
            recipes[recipe].pop(-1)
            recipes[recipe].append(name)
        else:
            recipes[recipe].pop(-2)
    if composition:
        last_space = WA.compose(composition, stacked_space)
        if size != 2:
            stacked_space = Space.vstack(stacked_space, last_space)

io_utils.save(last_space, "recicomp.pkl")
コード例 #6
0
ファイル: ex10.py プロジェクト: georgiana-dinu/dissect
#ex10.py
#-------
from composes.utils import io_utils
from composes.composition.weighted_additive import WeightedAdditive

#load a space
my_space = io_utils.load("./data/out/ex10.pkl")

print my_space.id2row
print my_space.cooccurrence_matrix

# instantiate a weighted additive model
my_comp = WeightedAdditive(alpha = 1, beta = 1)

# use the model to compose words in my_space
composed_space = my_comp.compose([("good", "book", "good_book"),
                                  ("good", "car", "good_car")], 
                                 my_space)

print composed_space.id2row
print composed_space.cooccurrence_matrix

#save the composed space
io_utils.save(composed_space, "data/out/PHRASE_SS.ex10.pkl")


コード例 #7
0
ファイル: analogy.py プロジェクト: deveshbatra/Tutorials
##########################################################################

from composes.utils import io_utils
from composes.composition.weighted_additive import WeightedAdditive
from composes.similarity.cos import CosSimilarity
import sys



pkl=sys.argv[1]
base=sys.argv[2]
minus=sys.argv[3]
plus=sys.argv[4]

space = io_utils.load(pkl)

# instantiate an additive and subtractive model
add = WeightedAdditive(alpha = 1, beta = 1)
sub = WeightedAdditive(alpha = 1, beta = -1)


#print space.get_neighbours(base, 10, CosSimilarity())

print "Subtracting",minus,"from",base
composed_space = sub.compose([(base, minus, "step1")], space)
#print composed_space.get_neighbours("step1", 10, CosSimilarity(),space)

print "Adding",plus,"..."
composed_space2 = add.compose([("step1", plus, "step2")], (composed_space,space))
print composed_space2.get_neighbours("step2", 10, CosSimilarity(),space)
コード例 #8
0
ファイル: dissect.py プロジェクト: DariaRyzhova/phd
        els_for_comp.append(element)
    return els_for_comp


typ_space = create_space(TypDmFile, TypRowsFile)
distr_space = create_space(DistrDmFile, DistrRowsFile)

#load a space from a pickle file
#my_space = io_utils.load("./sharp/lexfunc/lexfunc_Ridge_pract.pkl")

#distributional vectors processing
distr_space = distr_space.apply(PpmiWeighting())
distr_space = distr_space.apply(Svd(300))
#io_utils.save(distr_space, "./spaces/smooth_phrases_ppmi.pkl")

items = items_from_file(itemsFile)
els_for_comp = elements_for_composition(items)

my_comp = WeightedAdditive(alpha=1, beta=1)
distr_space = my_comp.compose(els_for_comp, distr_space)

pairs = pairs(items)

predicted = distr_space.get_sims(pairs, CosSimilarity())
gold = typ_space.get_sims(pairs, CosSimilarity())

#compute correlations
print "Spearman"
print scoring_utils.score(gold, predicted, "spearman")
print "Pearson"
print scoring_utils.score(gold, predicted, "pearson")
コード例 #9
0
#compute multiplication/addition of a list of word pairs
fname = sys.argv[2]
word_pairs = io_utils.read_tuple_list(fname, fields=[0, 1])

lengths = []
found = True
for wp in word_pairs:
    try:
        v1 = my_space.get_row(wp[0])
        v2 = my_space.get_row(wp[1])
    except KeyError:
        #print wp[0],"or",wp[1],"not found"
        found = False
    if found:
        composed_space = add.compose([(wp[0], wp[1], "_composed_")], my_space)
        neighbours = composed_space.get_neighbours("_composed_",
                                                   10,
                                                   CosSimilarity(),
                                                   space2=my_space)
        print wp[0], wp[1]
        print neighbours
        density = 0
        for n in neighbours:
            density += n[1]
        density = density / 10
        print "Density", density
        c = composed_space.get_row("_composed_")
        print "Norm ", c.norm()
        cos = composed_space.get_sim("_composed_",
                                     wp[1],
コード例 #10
0
ファイル: word2vec_example.py プロジェクト: Aliases/dissect
#                                   format = "dm",
#                                   data ="SOME_PATH_FOR_A_WORD_TO_VEC_PERIPHERAL_SPACE_DATA"
#                                   )
		

# Debug
# print space.cooccurrence_matrix
# print space.id2row


# instantiate a weighted additive model
my_comp = WeightedAdditive(alpha = 1, beta = 1)

# use the model to compose words in my_space
composed_space = my_comp.compose([("good", "book", "good_book"),
                                  ("good", "car", "good_car")], 
                                 space)

print composed_space.id2row
print composed_space.cooccurrence_matrix
print composed_space.get_sims([("good_car","good_book")], CosSimilarity()) # Similarity metric

#===============================================================================================================
print "="*80
#===============================================================================================================
##Training Models

##Training Weight Additive Models

#training data
train_data = [("good", "car", "good_car"),
コード例 #11
0
ingredients = []
print("Enter ingredients, enter when done")
while True:
    ingredient = raw_input("> ").replace(" ","_")
    if ingredient == "":
        break
    if ingredient not in stacked.id2row:
        print("(not found, skipping)")
        continue
    ingredients.append(ingredient)

name = ""
while True:
    (a,b) = ingredients.pop(-1),ingredients.pop(-1)
    name = "comp_" + str(next(number))
    ingredients.append(name)
    new_space = WA.compose([(a,b,name)], stacked)
    if len(ingredients) > 1:
        stacked = Space.vstack(stacked, new_space)
    else:
        break

stacked = Space.vstack(recicomp, new_space)
top = []
for recipe in stacked.id2row:
    if recipe == name:
        continue
    sim = stacked.get_sim(recipe, name, CosSimilarity())
    ins(top, (sim,recipe))
print("Nearest neighbors:",", ".join([x[1].replace("_"," ") + " (" + str(x[0]) + ")" for x in top]))