Esempio n. 1
0
class Aligner(object):
    """
    the Algraeph application model 
    """
    
    def __init__(self):
        self._corpus = ParallelGraphCorpus()
        # the domain model
        self._changed = False
        self._filename = None
        self._graph_pair = None
        self._graph_pair_index = None
        self._graphs = Pair(None, None)
        self._nodes = Pair(None, None)
        # the special relation which stands for "no relation"
        self._no_relation = "none"
        self._co_node_selection = False
        
    # ------------------------------------------------------------------------------
    # Corpus
    # ------------------------------------------------------------------------------

    def open_corpus(self, filename):
        send(self.open_corpus, "statusDescription", "Loading corpus %s ..." % filename)

        # May raise errors such IOErrors, not an xml file, corrupt format, etc.
        # Use of relax_gb_paths allows graphbank files to be located in the
        # same direcory as the corpus file instead of the location specified
        # in the <file> element
        corpus = ParallelGraphCorpus()
        corpus.read(inf=filename, relax_gb_paths=True)
        
        if not corpus:
            raise AlgraephException("Parallel graph corpus contains no alignments")
        
        self._corpus = corpus
        self._filename = filename
        self._changed = False
            
        send(self.open_corpus, "statusDescription")
        send(self.open_corpus, "newCorpus")
        send(self.open_corpus, "newCorpusName")

        self.goto_graph_pair(0)
        # implies send("newGraphPair"), and sets self._graph_pair,
        # self._graph_pair_index, self._graphs and self._nodes

        
    def save_corpus(self, filename=None):
        if filename:
            self._filename = filename
            send(self.save_corpus, "newCorpusName")
            
        send(self.save_corpus, "statusDescription", "Saving corpus %s ..." % self._filename)        
        
        self._corpus.write(self._filename, pprint=True)
        self._changed = False
            
        send(self.save_corpus, "statusDescription")
        
        
    def get_corpus_len(self):
        return len(self._corpus)
    

    def get_corpus_filename(self):
        return self._filename

    
    def get_corpus_dir(self):
        try:
            return dirname(self._filename)
        except (AttributeError, TypeError):
            return None
    
    
    def corpus_changed(self):
        """
        returns True if the corpus has unsaved changes
        """
        return self._changed
        
    
    # ------------------------------------------------------------------------------
    # Treebanks
    # ------------------------------------------------------------------------------    
 
    def get_graphbanks_format(self):
        # The ParallelGraphCorpus class in principle supports graphbanks in
        # different formats, although untested for the time being. Formats are
        # therefore stored as a property of the graphbanks, but there is no
        # global format defined as a property of the corpus. So getting "the
        # graphbanks format" is not straightforward. We will make the
        # assumption that all graphbanks are in the same format, and there it
        # is sufficient to look at any graphbank linked to an arbitary graph
        # pair.
        return self._corpus[0].get_source_bank().get_format()
        
    # ------------------------------------------------------------------------------
    # Graphs (GraphPair and DaesoGraph)
    # ------------------------------------------------------------------------------    
    
    def get_graph_pair(self):
        return self._graph_pair
    
    
    def goto_prev_graph_pair(self):
        self.goto_graph_pair(self._graph_pair_index - 1)

    def goto_next_graph_pair(self):
        self.goto_graph_pair(self._graph_pair_index + 1)
        
        
    def goto_graph_pair(self, index):
        # don't use try-except here, because negative index is allowed for list
        if 0 <= index < len(self._corpus):
            self._graph_pair = self._corpus[index]
            self._graph_pair_index = index
            self._graphs = self._graph_pair.get_graphs()
            self._nodes = Pair(None, None)
            
            send(self.goto_graph_pair, "newGraphPair.viz")
            send(self.goto_graph_pair, "newGraphPair.gui")
    
        
    def get_from_graph(self):
        return self._graphs.source
    
    def get_to_graph(self):
        return self._graphs.target
    
        
    def get_from_graph_tokens(self):
        return self._graphs.source.get_graph_token_string()
        
    def get_to_graph_tokens(self):
        return self._graphs.target.get_graph_token_string()
    
    
    def get_graph_pair_counter(self):
        # counting starts from 1
        return (self._graph_pair_index + 1, len(self._corpus))
        
    
    # ------------------------------------------------------------------------------
    # Nodes
    # ------------------------------------------------------------------------------
    
    def co_node_selection_mode(self, state=False):
        self._co_node_selection = state
        
    
    def set_from_node(self, node=None):
        self._nodes.source = node
        
        if self._co_node_selection:
            self._nodes.target = self.get_aligned_to_node()
            
        send(self.set_from_node, "newNodeSelect.viz")
        send(self.set_from_node, "newNodeSelect.gui")

        
    def set_to_node(self, node=None):
        self._nodes.target = node
        
        if self._co_node_selection:
            self._nodes.source = self.get_aligned_from_node()
            
        send(self.set_to_node, "newNodeSelect.viz")
        send(self.set_to_node, "newNodeSelect.gui")
            
    
    def get_from_node(self):
        return self._nodes.source

    
    def get_to_node(self):
        return self._nodes.target
    
    
    def nodes_are_selected(self):
        return all(self._nodes)
    
    
    def get_from_node_tokens(self):
        return ( self._graphs.source.get_node_token_string(self._nodes.source) or
                 "" )
        
    
    def get_to_node_tokens(self):
        return ( self._graphs.target.get_node_token_string(self._nodes.target) or
                 "" )
    
    # ------------------------------------------------------------------------------
    # Alignment
    # ------------------------------------------------------------------------------
    
    def get_relation_set(self):
        try:
            return [self._no_relation] + self._corpus.get_relations()
        except TypeError:
            return [self._no_relation]
        
        
    def get_node_pair_relation(self):
        return self._graph_pair.get_align(self._nodes) or self._no_relation
    
        
    def set_node_pair_relation(self, relation):
        if self.nodes_are_selected():
            if relation != self._no_relation:
                self._graph_pair.add_align(self._nodes, relation)
            else:
                self._graph_pair.del_align(self._nodes)
                
            self._changed = True
                
            send(self.set_node_pair_relation, "newRelation.viz")
            send(self.set_node_pair_relation, "newRelation.gui")
    

    def get_aligned_to_node(self):
        """
        Get 'to' node aligned to the selected 'from' node
        """
        return self._graph_pair.get_aligned_target_node(self._nodes.source)
    
    
    def get_aligned_from_node(self):
        """
        Get 'from' node aligned to the selected 'to' node
        """
        return self._graph_pair.get_aligned_source_node(self._nodes.target)

    
    def get_auto_fold_equal_nodes(self):
        """
        Get lists of non-terminal 'from' and 'to' nodes aligned with an 
        'equals' relation
        """
        # ignoring terminals, so the list may be of unequal size
        from_nodes = []
        to_nodes = []
        
        for (nodes, rel) in self._graph_pair.alignments_iter():
            if rel == "equals":
                if self._graphs.source.node_is_non_terminal(nodes.source):
                    from_nodes.append(nodes.source)
                    
                if self._graphs.target.node_is_non_terminal(nodes.target):
                    to_nodes.append(nodes.target)
                    
        return from_nodes, to_nodes
        
    #------------------------------------------------------------------------------
    # Comments
    #------------------------------------------------------------------------------    
        
    def get_comment(self):
        try:
            return self._graph_pair.get_meta_data().find("comment").text
        except AttributeError:
            return ""

    
    def set_comment(self, text):
        meta_data_elem = self._graph_pair.get_meta_data()
        comment_elem = meta_data_elem.find("comment")
        
        if text.strip():
            if comment_elem is None:
                comment_elem = SubElement(meta_data_elem, "comment")
            comment_elem.text = text
        elif comment_elem:
            meta_data_elem.remove(comment_elem)
            
        self._changed = True
Esempio n. 2
0
class Test_ParallelGraphCorpus(unittest.TestCase):
    
    def setUp(self):
        self.pgc1 = ParallelGraphCorpus(inf="data/corpus-1.pgc")
    
        
    def test__init(self):
        """
        init from another corpus
        """
        ParallelGraphCorpus(self.pgc1, self.pgc1.get_relations())
        
    
    def test__add__(self):
        """
        corpus + other
        """
        pgc2 = ParallelGraphCorpus(inf="data/corpus-2.pgc")
        pgc3 = self.pgc1 + pgc2

        self.assertEqual(len(pgc3), len(self.pgc1) + len(pgc2))
        
        
    def test__deepcopy__(self):
        """
        copy.deepcopy(corpus)
        """
        pgc2 = copy.deepcopy(self.pgc1)
        
        self.assertTrue(isinstance(pgc2, ParallelGraphCorpus))
        self.assertFalse(self.pgc1._relations is pgc2._relations)
        self.assertFalse(self.pgc1._meta_data is pgc2._meta_data)
        
        for gp1, gp2 in zip(self.pgc1, pgc2):
            self.assertFalse(gp1 is gp2)
            # however, graphbanks and graphs are still shared
            self.assertTrue(gp1._banks is gp2._banks)
            self.assertTrue(gp1._graphs is gp2._graphs)
            
            
    def test__delitem__(self):
        """
        del corpus[1]
        """
        pg = self.pgc1[0]
        del self.pgc1[0]
        self.assertFalse(pg in self.pgc1)
        
        
    def test__delslice__(self):
        """
        del [:1]
        """
        pg = self.pgc1[0]
        del self.pgc1[:1]
        self.assertFalse(pg in self.pgc1)
        
        del self.pgc1[:]
        self.assertEqual(len(self.pgc1), 0)
        
        
    def test__eq__(self):
        self.assertEqual(self.pgc1, self.pgc1)
        
        pgc2 = self.pgc1[:]
        self.assertEqual(self.pgc1, pgc2)
        
        pgc2 = copy.deepcopy(self.pgc1)
        self.assertEqual(self.pgc1, pgc2)
        
        
    def test__getitem__(self):
        self.assertTrue(isinstance(self.pgc1[0], GraphPair))
        
    
    def test__getslice__(self):
        # or shallow copy
        pgc2 = self.pgc1[1:1:1]
        
        self.assertTrue(isinstance(pgc2, ParallelGraphCorpus))
        self.assertTrue(self.pgc1._relations is pgc2._relations)
        self.assertTrue(self.pgc1._meta_data is pgc2._meta_data)
        
        for gp1, gp2 in zip(self.pgc1, pgc2):
            self.assertTrue(gp1 is gp2)
            
            
    def test__iadd__(self):
        self.pgc1 += self.pgc1
        self.assertEquals(len(self.pgc1), 6)
        
        pgc2 = ParallelGraphCorpus(inf="data/corpus-2.pgc")
        pgc2 += self.pgc1
        self.assertEquals(len(pgc2), 9)
        
        
    def test__repr__(self):
        self.assertTrue(repr(self.pgc1))
        
        
    def test__str__(self):
        self.assertTrue(str(self.pgc1))
        
        
    def test__setitem__(self):
        self.pgc1[0] = self.pgc1[-1]
        self.assertEqual(self.pgc1[0], self.pgc1[-1])
        
        self.assertRaises(TypeError, 
                          ParallelGraphCorpus.__setitem__,
                          self.pgc1,
                          1)
        
        
    def test__setslice__(self):
        pgc2 = ParallelGraphCorpus(inf="data/corpus-2.pgc")
        self.pgc1[-1:] = pgc2[:2]
        self.assertEqual(len(self.pgc1), 4)
        
        self.assertRaises(TypeError,
                          ParallelGraphCorpus.__setslice__,
                          self.pgc1,
                          1,
                          1,
                          ["x"])
        
        
    def test_append(self):
        pgc2 = ParallelGraphCorpus(inf="data/corpus-2.pgc")
        self.pgc1.append(pgc2[2])
        self.assertEqual(len(self.pgc1), 4)
        
        self.assertRaises(TypeError,
                          ParallelGraphCorpus.__setslice__,
                          self.pgc1,
                          1,
                          1,
                          ["x"])
        
        
    def test_clear(self):
        self.pgc1.clear()
        self.assertFalse(self.pgc1)
        self.assertTrue(isinstance(self.pgc1, ParallelGraphCorpus))
        
        
    def test_extend(self):
        pgc2 = ParallelGraphCorpus(inf="data/corpus-2.pgc")
        self.pgc1.extend(iter(pgc2))
        self.assertEqual(len(self.pgc1), 6)
        
        
    def test_purge(self):
        # adding graph pairs with identical graphbanks
        pgc1 = ParallelGraphCorpus(inf="data/corpus-1.pgc")
        pgc1 += pgc1
        graphbanks_before = pgc1._graphbanks()
        self.assertEqual(len(graphbanks_before), 2)
        pgc1.purge()
        graphbanks_after = pgc1._graphbanks()        
        self.assertEqual(graphbanks_before, graphbanks_after)
        
        # adding graph pairs with equal graphbanks
        pgc1 = ParallelGraphCorpus(inf="data/corpus-1.pgc")
        pgc2 = ParallelGraphCorpus(inf="data/corpus-1.pgc")
        pgc1 += pgc2
        graphbanks_before = pgc1._graphbanks()
        self.assertEqual(len(graphbanks_before), 4)
        pgc1.purge()
        graphbanks_after = pgc1._graphbanks()        
        self.assertEqual(len(graphbanks_after), 2)
        
        # adding graph pairs with different graphbanks
        pgc1 = ParallelGraphCorpus(inf="data/corpus-1.pgc")
        pgc2 = ParallelGraphCorpus(inf="data/corpus-2.pgc")
        pgc1 += pgc2
        graphbanks_before = pgc1._graphbanks()
        self.assertEqual(len(graphbanks_before), 4)
        pgc1.purge()
        graphbanks_after = pgc1._graphbanks()        
        self.assertEqual(graphbanks_before, graphbanks_after)
        
        # removing graphpairs and thus dependencies on graphbanks
        del pgc1[:]
        graphbanks = pgc1._graphbanks()
        self.assertEqual(len(graphbanks), 0)
            
        
    def test__graph_banks(self):
        graphbanks = self.pgc1._graphbanks()
        self.assertEqual(len(graphbanks), 2)
        
        for gb in graphbanks:
            self.assertTrue(isinstance(gb, GraphBank)) 
            
            
    def test_annotator(self):
        self.assertFalse(self.pgc1.get_annotator())
        self.pgc1.set_annotator("AA")
        self.assertEqual(self.pgc1.get_annotator(), "AA")
        self.pgc1.set_annotator("BB")
        self.assertEqual(self.pgc1.get_annotator(), "AA + BB")
        self.pgc1.set_annotator("CC", append=False)
        self.assertEqual(self.pgc1.get_annotator(), "CC")
Esempio n. 3
0
class Aligner(object):
    """
    the Algraeph application model 
    """
    def __init__(self):
        self._corpus = ParallelGraphCorpus()
        # the domain model
        self._changed = False
        self._filename = None
        self._graph_pair = None
        self._graph_pair_index = None
        self._graphs = Pair(None, None)
        self._nodes = Pair(None, None)
        # the special relation which stands for "no relation"
        self._no_relation = "none"
        self._co_node_selection = False

    # ------------------------------------------------------------------------------
    # Corpus
    # ------------------------------------------------------------------------------

    def open_corpus(self, filename):
        send(self.open_corpus, "statusDescription",
             "Loading corpus %s ..." % filename)

        # May raise errors such IOErrors, not an xml file, corrupt format, etc.
        # Use of relax_gb_paths allows graphbank files to be located in the
        # same direcory as the corpus file instead of the location specified
        # in the <file> element
        corpus = ParallelGraphCorpus()
        corpus.read(inf=filename, relax_gb_paths=True)

        if not corpus:
            raise AlgraephException(
                "Parallel graph corpus contains no alignments")

        self._corpus = corpus
        self._filename = filename
        self._changed = False

        send(self.open_corpus, "statusDescription")
        send(self.open_corpus, "newCorpus")
        send(self.open_corpus, "newCorpusName")

        self.goto_graph_pair(0)
        # implies send("newGraphPair"), and sets self._graph_pair,
        # self._graph_pair_index, self._graphs and self._nodes

    def save_corpus(self, filename=None):
        if filename:
            self._filename = filename
            send(self.save_corpus, "newCorpusName")

        send(self.save_corpus, "statusDescription",
             "Saving corpus %s ..." % self._filename)

        self._corpus.write(self._filename, pprint=True)
        self._changed = False

        send(self.save_corpus, "statusDescription")

    def get_corpus_len(self):
        return len(self._corpus)

    def get_corpus_filename(self):
        return self._filename

    def get_corpus_dir(self):
        try:
            return dirname(self._filename)
        except (AttributeError, TypeError):
            return None

    def corpus_changed(self):
        """
        returns True if the corpus has unsaved changes
        """
        return self._changed

    # ------------------------------------------------------------------------------
    # Treebanks
    # ------------------------------------------------------------------------------

    def get_graphbanks_format(self):
        # The ParallelGraphCorpus class in principle supports graphbanks in
        # different formats, although untested for the time being. Formats are
        # therefore stored as a property of the graphbanks, but there is no
        # global format defined as a property of the corpus. So getting "the
        # graphbanks format" is not straightforward. We will make the
        # assumption that all graphbanks are in the same format, and there it
        # is sufficient to look at any graphbank linked to an arbitary graph
        # pair.
        return self._corpus[0].get_source_bank().get_format()

    # ------------------------------------------------------------------------------
    # Graphs (GraphPair and DaesoGraph)
    # ------------------------------------------------------------------------------

    def get_graph_pair(self):
        return self._graph_pair

    def goto_prev_graph_pair(self):
        self.goto_graph_pair(self._graph_pair_index - 1)

    def goto_next_graph_pair(self):
        self.goto_graph_pair(self._graph_pair_index + 1)

    def goto_graph_pair(self, index):
        # don't use try-except here, because negative index is allowed for list
        if 0 <= index < len(self._corpus):
            self._graph_pair = self._corpus[index]
            self._graph_pair_index = index
            self._graphs = self._graph_pair.get_graphs()
            self._nodes = Pair(None, None)

            send(self.goto_graph_pair, "newGraphPair.viz")
            send(self.goto_graph_pair, "newGraphPair.gui")

    def get_from_graph(self):
        return self._graphs.source

    def get_to_graph(self):
        return self._graphs.target

    def get_from_graph_tokens(self):
        return self._graphs.source.get_graph_token_string()

    def get_to_graph_tokens(self):
        return self._graphs.target.get_graph_token_string()

    def get_graph_pair_counter(self):
        # counting starts from 1
        return (self._graph_pair_index + 1, len(self._corpus))

    # ------------------------------------------------------------------------------
    # Nodes
    # ------------------------------------------------------------------------------

    def co_node_selection_mode(self, state=False):
        self._co_node_selection = state

    def set_from_node(self, node=None):
        self._nodes.source = node

        if self._co_node_selection:
            self._nodes.target = self.get_aligned_to_node()

        send(self.set_from_node, "newNodeSelect.viz")
        send(self.set_from_node, "newNodeSelect.gui")

    def set_to_node(self, node=None):
        self._nodes.target = node

        if self._co_node_selection:
            self._nodes.source = self.get_aligned_from_node()

        send(self.set_to_node, "newNodeSelect.viz")
        send(self.set_to_node, "newNodeSelect.gui")

    def get_from_node(self):
        return self._nodes.source

    def get_to_node(self):
        return self._nodes.target

    def nodes_are_selected(self):
        return all(self._nodes)

    def get_from_node_tokens(self):
        return (self._graphs.source.get_node_token_string(self._nodes.source)
                or "")

    def get_to_node_tokens(self):
        return (self._graphs.target.get_node_token_string(self._nodes.target)
                or "")

    # ------------------------------------------------------------------------------
    # Alignment
    # ------------------------------------------------------------------------------

    def get_relation_set(self):
        try:
            return [self._no_relation] + self._corpus.get_relations()
        except TypeError:
            return [self._no_relation]

    def get_node_pair_relation(self):
        return self._graph_pair.get_align(self._nodes) or self._no_relation

    def set_node_pair_relation(self, relation):
        if self.nodes_are_selected():
            if relation != self._no_relation:
                self._graph_pair.add_align(self._nodes, relation)
            else:
                self._graph_pair.del_align(self._nodes)

            self._changed = True

            send(self.set_node_pair_relation, "newRelation.viz")
            send(self.set_node_pair_relation, "newRelation.gui")

    def get_aligned_to_node(self):
        """
        Get 'to' node aligned to the selected 'from' node
        """
        return self._graph_pair.get_aligned_target_node(self._nodes.source)

    def get_aligned_from_node(self):
        """
        Get 'from' node aligned to the selected 'to' node
        """
        return self._graph_pair.get_aligned_source_node(self._nodes.target)

    def get_auto_fold_equal_nodes(self):
        """
        Get lists of non-terminal 'from' and 'to' nodes aligned with an 
        'equals' relation
        """
        # ignoring terminals, so the list may be of unequal size
        from_nodes = []
        to_nodes = []

        for (nodes, rel) in self._graph_pair.alignments_iter():
            if rel == "equals":
                if self._graphs.source.node_is_non_terminal(nodes.source):
                    from_nodes.append(nodes.source)

                if self._graphs.target.node_is_non_terminal(nodes.target):
                    to_nodes.append(nodes.target)

        return from_nodes, to_nodes

    #------------------------------------------------------------------------------
    # Comments
    #------------------------------------------------------------------------------

    def get_comment(self):
        try:
            return self._graph_pair.get_meta_data().find("comment").text
        except AttributeError:
            return ""

    def set_comment(self, text):
        meta_data_elem = self._graph_pair.get_meta_data()
        comment_elem = meta_data_elem.find("comment")

        if text.strip():
            if comment_elem is None:
                comment_elem = SubElement(meta_data_elem, "comment")
            comment_elem.text = text
        elif comment_elem:
            meta_data_elem.remove(comment_elem)

        self._changed = True