Esempio n. 1
0
    def test_align(self):
        # create graph aligner
        descriptor = create_setting().descriptor
        classifier = TimblClassifier(descriptor, "exp/inst/dev001.inst")
        graph_aligner = GraphAligner(descriptor=descriptor,
                                     classifier=classifier)

        # create corpus aligner
        corpus_aligner = CorpusAligner(graph_aligner=graph_aligner,
                                       graph_selector=select_parsed_graph_pair)

        # align part of corpus
        corpus = ParallelGraphCorpus(inf="exp/true/dev001_true.pgc")[:3]
        corpus_aligner.align(corpus[:3], clear=True)
        #corpus.write(pprint=True)

        self.assertTrue(corpus[0].alignments() or corpus[1].alignments()
                        or corpus[2].alignments())
Esempio n. 2
0
                    "--config",
                    metavar="FILE",
                    help="configuration file to set up a corpus aligner")

parser.add_argument("-x",
                    "--clear",
                    action="store_true",
                    help="remove all existing alignments")

parser.add_argument("-i",
                    "--in-place",
                    action="store_true",
                    help="modify input file(s)")

args = parser.parse_args()

if args.config:
    config = imp.load_source("config", args.config)
    corpus_aligner = set_up_corpus_aligner(config)
else:
    from daeso_nl.ga.corpus import CorpusAligner
    corpus_aligner = CorpusAligner()

for inf in multiglob(args.pgc_files):
    corpus = ParallelGraphCorpus(inf=inf)
    corpus_aligner.align(corpus, clear=args.clear)

    if args.in_place:
        corpus.write(outf=inf, pprint=True)
    else:
        corpus.write(pprint=True)