Beispiel #1
0
    def add_edge(self, word, other_word, word_m, other_word_m, beta, simtype):
        ''' Add an edge between the two given words, if their similarity is
        higher than a threshold'''

        if word == other_word:
            return False

        #sim = self.calculate_similarity(word_features, other_word_features)
        sim = evaluate.calculate_similarity(beta, word_m, other_word_m,
                                            simtype)

        # if the words are similar enough, connect them.
        # TODO this can be done probabilistically -- that is connect based on similarity
        if sim >= self._sim_threshold:

            vert = self._words_vertex_map[word]
            other_vert = self._words_vertex_map[other_word]

            new_edge = self._graph.add_edge(vert, other_vert)
            self._graph.edge_properties["distance"][new_edge] = max(0, 1 - sim)
            self._graph.edge_properties["similarity"][new_edge] = sim

            #update the list of nodes with most degree

            self.update_most_list(vert, vert.out_degree(),
                                  self._highest_degree_nodes, self._hubs_num)
            self.update_most_list(other_vert, other_vert.out_degree(),
                                  self._highest_degree_nodes, self._hubs_num)
            return True

        return False
Beispiel #2
0
    def add_edge(self, word, other_word, word_m, other_word_m, beta, simtype):
        ''' Add an edge between the two given words, if their similarity is
        higher than a threshold'''

        if word == other_word:
            return False

        #sim = self.calculate_similarity(word_features, other_word_features)
        sim = evaluate.calculate_similarity(beta, word_m, other_word_m, simtype)


        # if the words are similar enough, connect them.
        # TODO this can be done probabilistically -- that is connect based on similarity
        if sim >= self._sim_threshold:

            vert = self._words_vertex_map[word]
            other_vert =  self._words_vertex_map[other_word]

            new_edge = self._graph.add_edge(vert, other_vert)
            self._graph.edge_properties["distance"][new_edge] = max(0, 1 - sim)
            self._graph.edge_properties["similarity"][new_edge] = sim

            #update the list of nodes with most degree
            
            self.update_most_list(vert, vert.out_degree(), self._highest_degree_nodes, self._hubs_num)
            self.update_most_list(other_vert, other_vert.out_degree(), self._highest_degree_nodes ,self._hubs_num)
            return True

        return False
Beispiel #3
0
    def calculate_acquisition_score(self, word):
        """
        Calculate and return the acquisition score of word. If "forgetting" is
        activated then the meaning probabilities need to be recalculated.
        
        """
        if self._forget_flag:
            self.update_meaning_prob(word)

        true_m = self._gold_lexicon.meaning(word)
        lrnd_m = self._learned_lexicon.meaning(word)

        sim = evaluate.calculate_similarity(self._beta, lrnd_m, true_m, self._simtype)
        self._acquisition_scores[word] = sim
        return sim
Beispiel #4
0
    def calculate_acquisition_score(self, word):
        """
        Calculate and return the acquisition score of word. If "forgetting" is
        activated then the meaning probabilities need to be recalculated.
        
        """
        if self._forget_flag:
            self.update_meaning_prob(word)

        true_m = self._gold_lexicon.meaning(word)
        lrnd_m = self._learned_lexicon.meaning(word)

        sim = evaluate.calculate_similarity(self._beta, lrnd_m, true_m,
                                            self._simtype)
        self._acquisition_scores[word] = sim
        return sim
Beispiel #5
0
    def create_final_graph(self, words, lexicon, beta, simtype):
        """ create a graph, given a set of words and their meanings """

        graph = Graph(directed=False)
        graph.vertex_properties["label"] = graph.new_vertex_property("string")
        graph.edge_properties["distance"] = graph.new_edge_property("double")
        graph.vertex_properties["acqscore"] = graph.new_vertex_property(
            "double")

        word_vertex_map = {}

        for word in words:
            word_vertex_map[word] = graph.add_vertex()
            graph.vertex_properties["label"][word_vertex_map[word]] = word

        for word in words:
            for otherword in words:
                if word == otherword:
                    continue

                vert = word_vertex_map[word]
                othervert = word_vertex_map[otherword]

                if graph.edge(vert, othervert) != None or graph.edge(
                        othervert, vert) != None:
                    continue

                word_m = lexicon.meaning(word)
                #                word_m_top_features = self.select_features(word_m._meaning_probs)

                otherword_m = lexicon.meaning(otherword)
                #                otherword_m_top_features = self.select_features(otherword_m._meaning_probs)

                #sim = self.calculate_similarity(word_m_top_features, otherword_m_top_features)
                sim = evaluate.calculate_similarity(beta, word_m, otherword_m,
                                                    simtype)

                if sim >= self._sim_threshold:
                    new_edge = graph.add_edge(vert, othervert)
                    graph.edge_properties["distance"][new_edge] = max(
                        0, 1 - sim)  #distance #TODO

        return graph
Beispiel #6
0
    def create_final_graph(self, words, lexicon, beta, simtype):
        """ create a graph, given a set of words and their meanings """

        graph = Graph(directed=False)
        graph.vertex_properties["label"] = graph.new_vertex_property("string")
        graph.edge_properties["distance"]  = graph.new_edge_property("double")
        graph.vertex_properties["acqscore"] = graph.new_vertex_property("double")


        word_vertex_map = {}

        for word in words:
            word_vertex_map[word] = graph.add_vertex()
            graph.vertex_properties["label"][word_vertex_map[word]] = word


        for word in words:
            for otherword in words:
                if word == otherword:
                    continue

                vert = word_vertex_map[word]
                othervert =  word_vertex_map[otherword]

                if graph.edge(vert, othervert) != None or graph.edge(othervert, vert)!= None:
                    continue

                word_m = lexicon.meaning(word)
#                word_m_top_features = self.select_features(word_m._meaning_probs)

                otherword_m = lexicon.meaning(otherword)
#                otherword_m_top_features = self.select_features(otherword_m._meaning_probs)


                #sim = self.calculate_similarity(word_m_top_features, otherword_m_top_features)
                sim = evaluate.calculate_similarity(beta, word_m, otherword_m, simtype)

                if sim >= self._sim_threshold:
                    new_edge = graph.add_edge(vert, othervert)
                    graph.edge_properties["distance"][new_edge] = max(0, 1 - sim ) #distance #TODO

        return graph
Beispiel #7
0
    def create_final_graph(self, words, lexicon, beta, simtype,words_to_compare=None):
        """ create a graph, given a set of words and their meanings """

        graph = Graph(directed=False)
        graph.vertex_properties["label"] = graph.new_vertex_property("string")
        graph.edge_properties["distance"]  = graph.new_edge_property("double")
        graph.vertex_properties["acqscore"] = graph.new_vertex_property("double")


        word_vertex_map = {}

        for word in words:
            word_vertex_map[word] = graph.add_vertex()
            graph.vertex_properties["label"][word_vertex_map[word]] = word

        
        all_word_pairs = itertools.combinations(words,2)

        for word,otherword in all_word_pairs:
            vert = word_vertex_map[word]
            othervert =  word_vertex_map[otherword]

            if graph.edge(vert, othervert) != None or graph.edge(othervert, vert)!= None:
                continue

            word_m = lexicon.meaning(word)
            #word_m_top_features = self.select_features(word_m._meaning_probs)

            otherword_m = lexicon.meaning(otherword)
            #otherword_m_top_features = self.select_features(otherword_m._meaning_probs)


            #sim = self.calculate_similarity(word_m_top_features, otherword_m_top_features)
            sim = evaluate.calculate_similarity(beta, word_m, otherword_m, simtype)

            if sim >= self._sim_threshold:
                new_edge = graph.add_edge(vert, othervert)
                graph.edge_properties["distance"][new_edge] = max(0, 1 - sim ) #distance #TODO
                
        if words_to_compare:
            
            words_to_compare = list(set(words_to_compare)-set(words))
            
            
            for _word in words_to_compare:                
                for _otherword in words:
                    
                    if _word == _otherword:
                        continue
                    
                    _word_m = lexicon.meaning(_word)
                    #word_m_top_features = self.select_features(word_m._meaning_probs)
        
                    _otherword_m = lexicon.meaning(_otherword)
                    #otherword_m_top_features = self.select_features(otherword_m._meaning_probs)
        
        
                    #sim = self.calculate_similarity(word_m_top_features, otherword_m_top_features)
                    _sim = evaluate.calculate_similarity(beta, _word_m, _otherword_m, simtype)
        
                    if _sim >= self._sim_threshold:
                        #print _sim
                        _vert = word_vertex_map[_word] = graph.add_vertex()
                        graph.vertex_properties["label"][_vert] = _word
                        
                        _othervert =  word_vertex_map[_otherword]
                        #print _word
                        #print _otherword
                        
                        _new_edge = graph.add_edge(_vert, _othervert)
                        graph.edge_properties["distance"][_new_edge] = max(0, 1 - _sim ) #distance #TODO                    

        return graph