def add_edge(self, word, other_word, word_m, other_word_m, beta, simtype): ''' Add an edge between the two given words, if their similarity is higher than a threshold''' if word == other_word: return False #sim = self.calculate_similarity(word_features, other_word_features) sim = evaluate.calculate_similarity(beta, word_m, other_word_m, simtype) # if the words are similar enough, connect them. # TODO this can be done probabilistically -- that is connect based on similarity if sim >= self._sim_threshold: vert = self._words_vertex_map[word] other_vert = self._words_vertex_map[other_word] new_edge = self._graph.add_edge(vert, other_vert) self._graph.edge_properties["distance"][new_edge] = max(0, 1 - sim) self._graph.edge_properties["similarity"][new_edge] = sim #update the list of nodes with most degree self.update_most_list(vert, vert.out_degree(), self._highest_degree_nodes, self._hubs_num) self.update_most_list(other_vert, other_vert.out_degree(), self._highest_degree_nodes, self._hubs_num) return True return False
def add_edge(self, word, other_word, word_m, other_word_m, beta, simtype): ''' Add an edge between the two given words, if their similarity is higher than a threshold''' if word == other_word: return False #sim = self.calculate_similarity(word_features, other_word_features) sim = evaluate.calculate_similarity(beta, word_m, other_word_m, simtype) # if the words are similar enough, connect them. # TODO this can be done probabilistically -- that is connect based on similarity if sim >= self._sim_threshold: vert = self._words_vertex_map[word] other_vert = self._words_vertex_map[other_word] new_edge = self._graph.add_edge(vert, other_vert) self._graph.edge_properties["distance"][new_edge] = max(0, 1 - sim) self._graph.edge_properties["similarity"][new_edge] = sim #update the list of nodes with most degree self.update_most_list(vert, vert.out_degree(), self._highest_degree_nodes, self._hubs_num) self.update_most_list(other_vert, other_vert.out_degree(), self._highest_degree_nodes ,self._hubs_num) return True return False
def calculate_acquisition_score(self, word): """ Calculate and return the acquisition score of word. If "forgetting" is activated then the meaning probabilities need to be recalculated. """ if self._forget_flag: self.update_meaning_prob(word) true_m = self._gold_lexicon.meaning(word) lrnd_m = self._learned_lexicon.meaning(word) sim = evaluate.calculate_similarity(self._beta, lrnd_m, true_m, self._simtype) self._acquisition_scores[word] = sim return sim
def create_final_graph(self, words, lexicon, beta, simtype): """ create a graph, given a set of words and their meanings """ graph = Graph(directed=False) graph.vertex_properties["label"] = graph.new_vertex_property("string") graph.edge_properties["distance"] = graph.new_edge_property("double") graph.vertex_properties["acqscore"] = graph.new_vertex_property( "double") word_vertex_map = {} for word in words: word_vertex_map[word] = graph.add_vertex() graph.vertex_properties["label"][word_vertex_map[word]] = word for word in words: for otherword in words: if word == otherword: continue vert = word_vertex_map[word] othervert = word_vertex_map[otherword] if graph.edge(vert, othervert) != None or graph.edge( othervert, vert) != None: continue word_m = lexicon.meaning(word) # word_m_top_features = self.select_features(word_m._meaning_probs) otherword_m = lexicon.meaning(otherword) # otherword_m_top_features = self.select_features(otherword_m._meaning_probs) #sim = self.calculate_similarity(word_m_top_features, otherword_m_top_features) sim = evaluate.calculate_similarity(beta, word_m, otherword_m, simtype) if sim >= self._sim_threshold: new_edge = graph.add_edge(vert, othervert) graph.edge_properties["distance"][new_edge] = max( 0, 1 - sim) #distance #TODO return graph
def create_final_graph(self, words, lexicon, beta, simtype): """ create a graph, given a set of words and their meanings """ graph = Graph(directed=False) graph.vertex_properties["label"] = graph.new_vertex_property("string") graph.edge_properties["distance"] = graph.new_edge_property("double") graph.vertex_properties["acqscore"] = graph.new_vertex_property("double") word_vertex_map = {} for word in words: word_vertex_map[word] = graph.add_vertex() graph.vertex_properties["label"][word_vertex_map[word]] = word for word in words: for otherword in words: if word == otherword: continue vert = word_vertex_map[word] othervert = word_vertex_map[otherword] if graph.edge(vert, othervert) != None or graph.edge(othervert, vert)!= None: continue word_m = lexicon.meaning(word) # word_m_top_features = self.select_features(word_m._meaning_probs) otherword_m = lexicon.meaning(otherword) # otherword_m_top_features = self.select_features(otherword_m._meaning_probs) #sim = self.calculate_similarity(word_m_top_features, otherword_m_top_features) sim = evaluate.calculate_similarity(beta, word_m, otherword_m, simtype) if sim >= self._sim_threshold: new_edge = graph.add_edge(vert, othervert) graph.edge_properties["distance"][new_edge] = max(0, 1 - sim ) #distance #TODO return graph
def create_final_graph(self, words, lexicon, beta, simtype,words_to_compare=None): """ create a graph, given a set of words and their meanings """ graph = Graph(directed=False) graph.vertex_properties["label"] = graph.new_vertex_property("string") graph.edge_properties["distance"] = graph.new_edge_property("double") graph.vertex_properties["acqscore"] = graph.new_vertex_property("double") word_vertex_map = {} for word in words: word_vertex_map[word] = graph.add_vertex() graph.vertex_properties["label"][word_vertex_map[word]] = word all_word_pairs = itertools.combinations(words,2) for word,otherword in all_word_pairs: vert = word_vertex_map[word] othervert = word_vertex_map[otherword] if graph.edge(vert, othervert) != None or graph.edge(othervert, vert)!= None: continue word_m = lexicon.meaning(word) #word_m_top_features = self.select_features(word_m._meaning_probs) otherword_m = lexicon.meaning(otherword) #otherword_m_top_features = self.select_features(otherword_m._meaning_probs) #sim = self.calculate_similarity(word_m_top_features, otherword_m_top_features) sim = evaluate.calculate_similarity(beta, word_m, otherword_m, simtype) if sim >= self._sim_threshold: new_edge = graph.add_edge(vert, othervert) graph.edge_properties["distance"][new_edge] = max(0, 1 - sim ) #distance #TODO if words_to_compare: words_to_compare = list(set(words_to_compare)-set(words)) for _word in words_to_compare: for _otherword in words: if _word == _otherword: continue _word_m = lexicon.meaning(_word) #word_m_top_features = self.select_features(word_m._meaning_probs) _otherword_m = lexicon.meaning(_otherword) #otherword_m_top_features = self.select_features(otherword_m._meaning_probs) #sim = self.calculate_similarity(word_m_top_features, otherword_m_top_features) _sim = evaluate.calculate_similarity(beta, _word_m, _otherword_m, simtype) if _sim >= self._sim_threshold: #print _sim _vert = word_vertex_map[_word] = graph.add_vertex() graph.vertex_properties["label"][_vert] = _word _othervert = word_vertex_map[_otherword] #print _word #print _otherword _new_edge = graph.add_edge(_vert, _othervert) graph.edge_properties["distance"][_new_edge] = max(0, 1 - _sim ) #distance #TODO return graph