class MixedCompositionalTreeKernel(SyntacticTreeKernel): ''' Mixed Salad Kernel 2 variation ''' kernel_name = "mixed_salad_kernel22" def __init__(self, lambda_): ''' Constructor ''' self._lambda = lambda_ self._measure = CosSimilarity() # default one def dot_product(self, tree1, tree2): assert_type(tree1, SemanticTree) assert_type(tree2, SemanticTree) return super(MixedCompositionalTreeKernel, self).dot_product(tree1, tree2) # new delta def _delta(self, node1, node2, node2id1, node2id2, delta_matrix): if (node1.is_terminal() and node2.is_terminal() and node1._label == node2._label and node1._word == node2._word): delta_matrix[node2id1[node1],node2id2[node2]] = 1 elif not node1.has_same_production(node2): if node1._label != node2._label: delta_matrix[node2id1[node1],node2id2[node2]] = 0 else: delta_matrix[node2id1[node1],node2id2[node2]] = self._measure.get_sim(node1._vector, node2._vector) else: product_children_delta = self._lambda for i in xrange(len(node1._children)): child1 = node1.get_child(i) child2 = node2.get_child(i) child_delta = delta_matrix[node2id1[child1],node2id2[child2]] if child_delta == -1: raise ValueError("???") else: product_children_delta *= (1 + child_delta) sim_children_product = 1 for i in xrange(len(node1._children)): child1 = node1.get_child(i) child2 = node2.get_child(i) sim_children_product *= self._measure.get_sim(child1._vector, child2._vector) final_delta = (product_children_delta + (self._measure.get_sim(node1._vector, node2._vector) - self._lambda * sim_children_product)) delta_matrix[node2id1[node1],node2id2[node2]] = final_delta
class NaiveCompositionalSemanticTreeKernel(SyntacticTreeKernel): """ Mixed Salad Kernel 1 """ kernel_name = "mixed_salad_kernel1" NO_COMPATIBILITY = 0 LABEL_COMPATIBILITY = 1 def __init__(self, lambda_, compatibility_level=LABEL_COMPATIBILITY): """ Constructor """ self._lambda = lambda_ self._compatibility_level = compatibility_level self._measure = CosSimilarity() def dot_product(self, tree1, tree2): assert_type(tree1, SemanticTree) assert_type(tree2, SemanticTree) return super(NaiveCompositionalSemanticTreeKernel, self).dot_product(tree1, tree2) def _delta(self, node1, node2, node2id1, node2id2, delta_matrix): delta = 0 if self._compatibility_level == NaiveCompositionalSemanticTreeKernel.NO_COMPATIBILITY or ( self._compatibility_level == NaiveCompositionalSemanticTreeKernel.LABEL_COMPATIBILITY and node1._label == node2._label ): delta = (self._lambda ** (node1.get_height() + node2.get_height())) * self._measure.get_sim( node1._vector, node2._vector ) delta_matrix[node2id1[node1], node2id2[node2]] = delta
class SentenceVectorKernel(Kernel): ''' classdocs ''' kernel_name = "sentence_vector_kernel" def __init__(self, similarity=None): ''' Constructor ''' if similarity is None: self._similarity = CosSimilarity() else: self._similarity = similarity def dot_product(self, tree1, tree2): assert_type(tree1, SemanticTree) assert_type(tree2, SemanticTree) sentence_vector1 = tree1._root._vector sentence_vector2 = tree2._root._vector if sentence_vector1.norm() == 0.0 or sentence_vector2.norm() == 0.0: return 0.0 else: return self._similarity.get_sim(sentence_vector1, sentence_vector2)
if antonyms is not None and replacement[:-2] in antonyms: continue try: replacement_vector = final_model.get_row(replacement) except Exception, ex: continue if add: context_repl_vector = base_unison + replacement_vector else: context_repl_vector = base_unison.multiply(replacement_vector) if base_unison is not None else replacement_vector results[replacement] = cos_sim.get_sim(context_word_vector, context_repl_vector) wnl = WordNetLemmatizer() ############################################################################# # This approach was to take similar words from similarity space and then # find synsets with the highest average replacement. ############################################################################# if enable_synset_avg: synsets = wn.synsets(replacement[:-2]) results_map = {} for synset in synsets: postag_list = get_wordnet_pos(replacement[-1].upper()) if synset.pos in postag_list: synset_syns = synset.lemma_names avg = 0 count = 0