Esempio n. 1
0
 def test_extract_val_binary(self):
     st = create_setting()
     st.develop = False
     # create tmp dirs for extraction output
     st.inst_dir = tempfile.mkdtemp()
     st.true_dir = tempfile.mkdtemp()
     st.binary = True
     
     extract(st)
     
     # check no of files
     self.assertEqual(len(st.val_true_fns), 
                      len(st.val_part_fns))
     self.assertEqual(len(st.val_inst_fns), 
                      len(st.val_part_fns))
     
     # test loading a corpus file
     corpus = ParallelGraphCorpus(inf=st.val_true_fns[0])
     
     # test loading a instances file
     inst = CorpusInst()
     inst.loadbin(st.val_inst_fns[0])
     self.assertEqual(len(corpus), len(inst))
     
     clean_inst(st)
     clean_true(st)
Esempio n. 2
0
 def test_extract_dev(self):
     st = create_setting()
     st.validate = False
     # create tmp dirs for extraction output
     st.inst_dir = tempfile.mkdtemp()
     st.true_dir = tempfile.mkdtemp()
     
     extract(st)
     
     # check no of files
     self.assertEqual(len(st.dev_true_fns), 
                      len(st.dev_part_fns))
     self.assertEqual(len(st.dev_inst_fns), 
                      len(st.dev_part_fns))
     
     # test loading a corpus file
     corpus = ParallelGraphCorpus(inf=st.dev_true_fns[0])
     
     # test loading a instances file
     inst = CorpusInst()
     inst.loadtxt(st.dev_inst_fns[0],
                  st.descriptor.dtype)
     self.assertEqual(len(corpus), len(inst))
     
     clean_inst(st)
     clean_true(st)
Esempio n. 3
0
    def test_extract_with_pp_graph_hooks(self):
        """
        test of extracting feature with preprocessing hook
        """
        st = create_setting()
        st.validate = False
        # create tmp dirs for extraction output
        st.inst_dir = tempfile.mkdtemp()
        st.true_dir = tempfile.mkdtemp()
        
        # a preprocessing function which insert an attribute "x" with value
        # "y" on every node inthe graphs
        def pp_hook1(graphs):
            for g in graphs:
                for attrs in g.node.values():
                    attrs[u"x"] = u"y"
        
        # a feature function which relies on the pp_hook above
        def ff_x(nodes, graphs, **kwargs):
            return graphs.source.node[nodes.source][u"x"]
        
        # create a feature description
        f = Feat(ff_x, "S1", pp_graph_hooks=[pp_hook1])
        
        # add to features; descriptor and extractor are automatically derived
        st.features = (f,)
        
        extract(st)
        
        # check no of files
        self.assertEqual(len(st.dev_true_fns), 
                         len(st.dev_part_fns))
        self.assertEqual(len(st.dev_inst_fns), 
                         len(st.dev_part_fns))

        # test loading a corpus file
        corpus = ParallelGraphCorpus(inf=st.dev_true_fns[0])

        # test loading a instances file
        inst = CorpusInst()
        inst.loadtxt(st.dev_inst_fns[0],
                     st.descriptor.dtype)
        self.assertEqual(len(corpus), len(inst))
        
        # check values produced by preprocessing function
        self.assertTrue(all(inst[0]["x"] == "y"))

        clean_inst(st)
        clean_true(st)
Esempio n. 4
0
    def test_extract_with_pp_graph_hooks(self):
        """
        test of extracting feature with preprocessing hook
        """
        st = create_setting()
        st.validate = False
        # create tmp dirs for extraction output
        st.inst_dir = tempfile.mkdtemp()
        st.true_dir = tempfile.mkdtemp()

        # a preprocessing function which insert an attribute "x" with value
        # "y" on every node inthe graphs
        def pp_hook1(graphs):
            for g in graphs:
                for attrs in g.node.values():
                    attrs[u"x"] = u"y"

        # a feature function which relies on the pp_hook above
        def ff_x(nodes, graphs, **kwargs):
            return graphs.source.node[nodes.source][u"x"]

        # create a feature description
        f = Feat(ff_x, "S1", pp_graph_hooks=[pp_hook1])

        # add to features; descriptor and extractor are automatically derived
        st.features = (f, )

        extract(st)

        # check no of files
        self.assertEqual(len(st.dev_true_fns), len(st.dev_part_fns))
        self.assertEqual(len(st.dev_inst_fns), len(st.dev_part_fns))

        # test loading a corpus file
        corpus = ParallelGraphCorpus(inf=st.dev_true_fns[0])

        # test loading a instances file
        inst = CorpusInst()
        inst.loadtxt(st.dev_inst_fns[0], st.descriptor.dtype)
        self.assertEqual(len(corpus), len(inst))

        # check values produced by preprocessing function
        self.assertTrue(all(inst[0]["x"] == "y"))

        clean_inst(st)
        clean_true(st)
Esempio n. 5
0
    def test_extract_dev(self):
        st = create_setting()
        st.validate = False
        # create tmp dirs for extraction output
        st.inst_dir = tempfile.mkdtemp()
        st.true_dir = tempfile.mkdtemp()

        extract(st)

        # check no of files
        self.assertEqual(len(st.dev_true_fns), len(st.dev_part_fns))
        self.assertEqual(len(st.dev_inst_fns), len(st.dev_part_fns))

        # test loading a corpus file
        corpus = ParallelGraphCorpus(inf=st.dev_true_fns[0])

        # test loading a instances file
        inst = CorpusInst()
        inst.loadtxt(st.dev_inst_fns[0], st.descriptor.dtype)
        self.assertEqual(len(corpus), len(inst))

        clean_inst(st)
        clean_true(st)
Esempio n. 6
0
    def test_extract_val_binary(self):
        st = create_setting()
        st.develop = False
        # create tmp dirs for extraction output
        st.inst_dir = tempfile.mkdtemp()
        st.true_dir = tempfile.mkdtemp()
        st.binary = True

        extract(st)

        # check no of files
        self.assertEqual(len(st.val_true_fns), len(st.val_part_fns))
        self.assertEqual(len(st.val_inst_fns), len(st.val_part_fns))

        # test loading a corpus file
        corpus = ParallelGraphCorpus(inf=st.val_true_fns[0])

        # test loading a instances file
        inst = CorpusInst()
        inst.loadbin(st.val_inst_fns[0])
        self.assertEqual(len(corpus), len(inst))

        clean_inst(st)
        clean_true(st)