def setUp(self): corpus = ParallelGraphCorpus( inf="../exp/corpora/news/pgc/ma/2006-11/news-2006-11-aligned-part-00.pgc") self.graph_pair = corpus[0] descriptor = Descriptor(cornet_sim) self.feat_extr = Extractor( descriptor, node_selector=select_visible_node)
def test_align_count(self): descriptor = Descriptor(term_align) feat_extr = Extractor( descriptor, node_selector=select_visible_node) graph_pair = self.corpus[0] instances = feat_extr.extract(graph_pair) self.dump(graph_pair, instances) # check for a couple of "interesting" instances #instance: 0 #source: 0: top: Posters Partij voor de Dieren verwijderd #target: 0: top: Zeeland verwijdert posters Partij voor de Dieren #align_inside_count: 6 #source_align_outside_count: 0 #target_align_outside_count: 0 #source_align_none_count: 0 #target_align_none_count: 1 self.assertEqual(instances[0]["align_inside_count"], 6) self.assertEqual(instances[0]["source_align_outside_count"], 0) self.assertEqual(instances[0]["target_align_outside_count"], 0) self.assertEqual(instances[0]["source_align_none_count"], 0) self.assertEqual(instances[0]["target_align_none_count"], 1) #instance: 2 #source: 0: top: Posters Partij voor de Dieren verwijderd #target: 2: name: Zeeland #align_inside_count: 0 #source_align_outside_count: 6 #target_align_outside_count: 0 #source_align_none_count: 0 #target_align_none_count: 1 self.assertEqual(instances[2]["align_inside_count"], 0) self.assertEqual(instances[2]["source_align_outside_count"], 6) self.assertEqual(instances[2]["target_align_outside_count"], 0) self.assertEqual(instances[2]["source_align_none_count"], 0) self.assertEqual(instances[2]["target_align_none_count"], 1) #instance: 25 #source: 2: noun: Posters #target: 3: verb: verwijdert #align_inside_count: 0 #source_align_outside_count: 1 #target_align_outside_count: 1 #source_align_none_count: 0 #target_align_none_count: 01 self.assertEqual(instances[25]["align_inside_count"], 0) self.assertEqual(instances[25]["source_align_outside_count"], 1) self.assertEqual(instances[25]["target_align_outside_count"], 1) self.assertEqual(instances[25]["source_align_none_count"], 0) self.assertEqual(instances[25]["target_align_none_count"], 0)
def test_align_count(self): descriptor = Descriptor(term_align) feat_extr = Extractor(descriptor, node_selector=select_visible_node) graph_pair = self.corpus[0] instances = feat_extr.extract(graph_pair) self.dump(graph_pair, instances) # check for a couple of "interesting" instances #instance: 0 #source: 0: top: Posters Partij voor de Dieren verwijderd #target: 0: top: Zeeland verwijdert posters Partij voor de Dieren #align_inside_count: 6 #source_align_outside_count: 0 #target_align_outside_count: 0 #source_align_none_count: 0 #target_align_none_count: 1 self.assertEqual(instances[0]["align_inside_count"], 6) self.assertEqual(instances[0]["source_align_outside_count"], 0) self.assertEqual(instances[0]["target_align_outside_count"], 0) self.assertEqual(instances[0]["source_align_none_count"], 0) self.assertEqual(instances[0]["target_align_none_count"], 1) #instance: 2 #source: 0: top: Posters Partij voor de Dieren verwijderd #target: 2: name: Zeeland #align_inside_count: 0 #source_align_outside_count: 6 #target_align_outside_count: 0 #source_align_none_count: 0 #target_align_none_count: 1 self.assertEqual(instances[2]["align_inside_count"], 0) self.assertEqual(instances[2]["source_align_outside_count"], 6) self.assertEqual(instances[2]["target_align_outside_count"], 0) self.assertEqual(instances[2]["source_align_none_count"], 0) self.assertEqual(instances[2]["target_align_none_count"], 1) #instance: 25 #source: 2: noun: Posters #target: 3: verb: verwijdert #align_inside_count: 0 #source_align_outside_count: 1 #target_align_outside_count: 1 #source_align_none_count: 0 #target_align_none_count: 01 self.assertEqual(instances[25]["align_inside_count"], 0) self.assertEqual(instances[25]["source_align_outside_count"], 1) self.assertEqual(instances[25]["target_align_outside_count"], 1) self.assertEqual(instances[25]["source_align_none_count"], 0) self.assertEqual(instances[25]["target_align_none_count"], 0)
def __init__(self, descriptor=None, extractor=None, classifier=None, matcher=None): self.descriptor = descriptor or Descriptor() self.extractor = extractor or Extractor(self.descriptor) self.classifier = classifier or Classifier() self.matcher = matcher or Matcher(no_rel=self.descriptor.no_rel)
class TestCornetFeatures(unittest.TestCase): def setUp(self): corpus = ParallelGraphCorpus( inf="../exp/corpora/news/pgc/ma/2006-11/news-2006-11-aligned-part-00.pgc") self.graph_pair = corpus[0] descriptor = Descriptor(cornet_sim) self.feat_extr = Extractor( descriptor, node_selector=select_visible_node) def test_cornet_server(self): create_cornet_server_proxy(SERVER) instances = self.feat_extr.extract(self.graph_pair) print instances # FIXME add asserts def test_cornet_load(self): print "\n(Loading Cornetto database - may take a long time...)" load_cornet(CDB_LU_FNAME, CDB_SYN_FNAME) instances = self.feat_extr.extract(self.graph_pair) print instances
def test_init_2(self): """ adding a new feature """ def ff_new_feat(**kwargs): return 1 def pp_hook1(): pass f1 = Feat( ff_new_feat , "i", pp_graph_hooks=[pp_hook1]), fd = Descriptor(f1) e = Extractor(fd) print e.descriptor.pprint()
def test_init_1(self): feat_extr = Extractor(Descriptor()) print feat_extr.descriptor.pprint()
def extractor(self): try: return self._extractor except AttributeError: self._extractor = Extractor(self.descriptor, self.node_selector) return self._extractor