from glob import glob from sys import stderr from xml.etree.cElementTree import ElementTree, tostring from daeso.utils.cli import DaesoArgParser from daeso.utils.etree import write # TODO: # - this is not too smart yet - you can do this with sed, # but escaping all the slashes is a pain # - windows support parser = DaesoArgParser(description=__doc__) parser.add_argument("corpus", nargs="+", help="parallel graph corpus") parser.add_argument( "-p", "--path-prefix-pair", nargs=2, default=["", ""], metavar="DIR", help="a pair of path prefixes specifying what to change from and to", ) parser.add_argument( "-t", "--test", action="store_true", help="perform a dry run without changing anything for real (implies -V)"
The graphs must have an "id" attribute which is identical to the "id" of a corresponding sentence (<s> element, by default) in the marked-up text files. That is, a <links>'s "from_id" and "to_id" must identify corresponding graphs in the "from" and "to" graphbanks respectively. The graphbanks are assumed to be in GraphML format, unless specified otherwise by means of the --source-graphbank-format and --target-graphbank-format option. The default set of alignment relations for the parallel graph corpus is the Daeso set, but you can change it using the --relations option. """ + epilog parser = DaesoArgParser(description=__doc__, epilog=epilog) parser.add_argument("-p", "--parallel-text-corpora", metavar="CORPUS", nargs="+", default=(), help='parallel text corpora') parser.add_argument("-s", "--source-graphbanks", metavar="GRAPHBANK", nargs="+", default=(), help='source graphbanks')
""" copy a parallel graph corpus Automatically takes care of the internal references to graph bank files. Usage is similar to the "cp" shell command. """ __authors__ = 'Erwin Marsi <*****@*****.**>' from os.path import isdir, basename, join, samefile, exists from sys import stderr from daeso.utils.cli import DaesoArgParser from daeso.pgc.corpus import ParallelGraphCorpus, LOAD_NONE parser = DaesoArgParser(description=__doc__) parser.add_argument("source", nargs="+", help="source parallel graph corpus file") parser.add_argument("target", help="either a target parallel graph corpus file or " "a target directory") parser.add_argument("-o", "--overwrite", action="store_true", help="overwrite existing file") args = parser.parse_args()
The graphs must have an "id" attribute which is identical to the "id" of a corresponding sentence (<s> element, by default) in the marked-up text files. That is, a <links>'s "from_id" and "to_id" must identify corresponding graphs in the "from" and "to" graphbanks respectively. The graphbanks are assumed to be in GraphML format, unless specified otherwise by means of the --source-graphbank-format and --target-graphbank-format option. The default set of alignment relations for the parallel graph corpus is the Daeso set, but you can change it using the --relations option. """ + epilog parser = DaesoArgParser(description=__doc__, epilog=epilog) parser.add_argument( "-p", "--parallel-text-corpora", metavar="CORPUS", nargs="+", default=(), help='parallel text corpora') parser.add_argument( "-s", "--source-graphbanks", metavar="GRAPHBANK", nargs="+", default=(), help='source graphbanks')
# TODO: # - silence warning about meta-data __authors__ = 'Erwin Marsi <*****@*****.**>' import sys from daeso.utils.cli import DaesoArgParser from daeso.utils.opsys import multiglob from daeso.pgc.corpus import ParallelGraphCorpus parser = DaesoArgParser(description=__doc__) parser.add_argument( "file", nargs="+", metavar="FILE", help="parallel graph corpus filename, " "or quoted file name pattern for parallel graph corpora" ) parser.add_argument( "-f", "--format", action="store_true", help="output indented XML" )
align graphs in parallel graph corpus """ __authors__ = 'Erwin Marsi <*****@*****.**>' __version__ = "0.9" import imp from daeso.utils.cli import DaesoArgParser from daeso.utils.opsys import multiglob from daeso.pgc.corpus import ParallelGraphCorpus from daeso_nl.ga.setup import set_up_corpus_aligner parser = DaesoArgParser(description=__doc__, version=__version__) parser.add_argument( "pgc_files", nargs="+", metavar="FILE", help="parallel graph corpus file" ) parser.add_argument( "-c", "--config", metavar="FILE", help="configuration file to set up a corpus aligner") parser.add_argument(
corpus_dir = getenv("DAESO_CORPUS", "") if not corpus_dir: stderr.write("Warning: environment variable DAESO_CORPUS not found!") def expand_globs(corpus_dir, globs): files = [] for pattern in globs: files.extend(relglob(corpus_dir, pattern)) return files parser = DaesoArgParser(description=__doc__, version=__version__) parser.add_argument( "pgc_glob", nargs="+", help=("glob (i.e. filename pattern) for parallel graph corpora, " "interpreted relative to the corpus base directory " "(cf. --corpus_dir)")) parser.add_argument( "-c", "--corpus-dir", default=corpus_dir, help="pgc filenames are interpreted relative to this base directory " "(default is '" + corpus_dir + "')")
#!/usr/bin/env python # -*- coding: utf-8 -*- """ simple server providing access to the Alpino parser for Dutch through XML-RPC """ __author__ = 'Erwin Marsi <*****@*****.**>' __version__ = "0.9" from sys import exit from daeso.utils.cli import DaesoArgParser from daeso_nl.alpino.server import start_server, DEFAULT_HOST, DEFAULT_PORT parser = DaesoArgParser(description=__doc__, version="%(prog)s version " + __version__) parser.add_argument("-H", "--host", default="%s:%d" % (DEFAULT_HOST, DEFAULT_PORT), metavar="HOST[:PORT]", help="name or IP address of host (default is '%s') " "optionally followed by a port number " "(default is %d)" % (DEFAULT_HOST, DEFAULT_PORT)) parser.add_argument("-c", "--command", help="command line to start Alpino parser") parser.add_argument("-o", "--out_dir", help="directory for writing temporary files")
""" parallel text corpus diff reports difference in text alignments between two parallel text corpora """ from daeso.utils.cli import DaesoArgParser from daeso.ptc.diff import print_diff __authors__ = 'Erwin Marsi <*****@*****.**>' parser = DaesoArgParser(description=__doc__) parser.add_argument( "true_corpus", help="parallel text corpus containing true alignments") parser.add_argument( "pred_corpus", help="parallel text corpus containing predicted alignments") parser.add_argument( "-t", "--tag", default="s", help='only consider alignments involving this tag (defaults is "s"')
# (at your option) any later version. # # The Algraeph program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program. If not, see <http://www.gnu.org/licenses/>. __author__ = "Erwin Marsi <*****@*****.**>" from daeso.utils.cli import DaesoArgParser from graeph.release import version, description parser = DaesoArgParser(description=description.strip(), version=version) parser.add_argument("corpus_file", metavar="FILE", nargs="?", help="parallel graph corpus file") parser.add_argument("-d", "--dot_exec", metavar="FILE", help='"dot" graph drawing program') parser.add_argument( "-r", "--redirect", action='store_true',
# GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program. If not, see <http://www.gnu.org/licenses/>. __version__ = "1.0" __author__ = "Erwin Marsi" from htxt.gui import Hitaext from daeso.utils.cli import DaesoArgParser description = """ Hitaext: hierarchical text aligment tool """ parser = DaesoArgParser(description=description.strip(), version=__version__) parser.add_argument("corpus_file", metavar="FILE", nargs="?", help="parallel text corpus file") parser.add_argument( "-r", "--redirect", action='store_true', help="redirect output written to stdout and stderr streams " "to a pop-up window") args = parser.parse_args()
__authors__ = 'Erwin Marsi <*****@*****.**>' from glob import glob from sys import stderr from xml.etree.cElementTree import ElementTree, tostring from daeso.utils.cli import DaesoArgParser from daeso.utils.etree import write # TODO: # - this is not too smart yet - you can do this with sed, # but escaping all the slashes is a pain # - windows support parser = DaesoArgParser(description=__doc__) parser.add_argument("corpus", nargs="+", help="parallel graph corpus") parser.add_argument( "-p", "--path-prefix-pair", nargs=2, default=["", ""], metavar="DIR", help="a pair of path prefixes specifying what to change from and to") parser.add_argument( "-t", "--test", action="store_true",
__authors__ = 'Erwin Marsi <*****@*****.**>' import os import sys from daeso.utils.cli import DaesoArgParser from daeso.pgc.corpus import ParallelGraphCorpus def log(s): if args.verbose: print >>sys.stderr, "***", s parser = DaesoArgParser(description=__doc__) parser.add_argument( "filename", metavar="FILE", help="parallel graph corpus" ) parser.add_argument( "-f", "--format", action="store_true", help="output indented XML" ) parser.add_argument( "-p", "--parts",
# # You should have received a copy of the GNU General Public License # along with this program. If not, see <http://www.gnu.org/licenses/>. __version__ = "1.0" __author__ = "Erwin Marsi" from htxt.gui import Hitaext from daeso.utils.cli import DaesoArgParser description = """ Hitaext: hierarchical text aligment tool """ parser = DaesoArgParser(description=description.strip(), version=__version__) parser.add_argument( "corpus_file", metavar="FILE", nargs="?", help="parallel text corpus file") parser.add_argument( "-r", "--redirect", action='store_true', help="redirect output written to stdout and stderr streams " "to a pop-up window") args = parser.parse_args()
# You should have received a copy of the GNU General Public License # along with this program. If not, see <http://www.gnu.org/licenses/>. """ dump instance base Converts a Timbl instance file to a Timbl instance base file given a graph aligner configuration. Timbl options and filenames are taken from the configuration file (cf. the "timbl_inst_fname" and "timbl_ib_fname" attributes). """ __authors__ = 'Erwin Marsi <*****@*****.**>' __version__ = "0.9" import imp from daeso.utils.cli import DaesoArgParser from daeso_nl.ga.setup import dump_inst_base parser = DaesoArgParser(description=__doc__, version=__version__) parser.add_argument( "config", metavar="FILE", help="configuration file to set up a corpus aligner") args = parser.parse_args() config = imp.load_source("config", args.config) dump_inst_base(config)
__authors__ = 'Erwin Marsi <*****@*****.**>' from glob import glob from daeso.utils.cli import DaesoArgParser, epilog from daeso.gb.gbstats import gb_stats epilog = """ Remarks: * Many columns will have zero values, because a parallel graph corpus is required to get alignment information; see pgc_stats.py * Failed parses will only be excluded for graph banks in 'alpino' format. """ + epilog parser = DaesoArgParser(description=__doc__, epilog=epilog) parser.add_argument("pattern", help="*quoted* pattern for graph bank files") parser.add_argument("-F", "--format", metavar="STRING", default="alpino", dest="format", help="treebank format (defaults to 'alpino')") parser.add_argument("-a", "--with-all", action="store_true", dest="with_all", help="include all, sets options -efp")
# - handle encoding errors # - reset cache __author__ = 'Erwin Marsi <*****@*****.**>' __version__ = '0.9' import sys import socket from xmlrpclib import ServerProxy, Fault from daeso.utils.cli import DaesoArgParser from daeso_nl.alpino.server import DEFAULT_HOST, DEFAULT_PORT parser = DaesoArgParser(description=__doc__, version=__version__) parser.add_argument( "-H", "--host", default="%s:%d" % (DEFAULT_HOST, DEFAULT_PORT), metavar="HOST[:PORT]", help="name or IP address of host (default is '%s') " "optionally followed by a port number " "(default is %d)" % (DEFAULT_HOST, DEFAULT_PORT)) parser.add_argument( "-i", "--input-encoding", default="utf8", metavar="utf8|latin1|ascii|...",
copy a parallel graph corpus Automatically takes care of the internal references to graph bank files. Usage is similar to the "cp" shell command. """ __authors__ = 'Erwin Marsi <*****@*****.**>' from os.path import isdir, basename, join, samefile, exists from sys import stderr from daeso.utils.cli import DaesoArgParser from daeso.pgc.corpus import ParallelGraphCorpus, LOAD_NONE parser = DaesoArgParser(description=__doc__) parser.add_argument( "source", nargs="+", help="source parallel graph corpus file" ) parser.add_argument( "target", help="either a target parallel graph corpus file or " "a target directory" ) parser.add_argument(
zip_arch.write(corpus_filename, arch_filename) corpus = ParallelGraphCorpus(inf=corpus_filename, graph_loading=LOAD_NONE) for gb in corpus._graphbanks(): gb_filename = gb.get_file_path() # add graphbank files to archive arch_filename = os.path.join(arch_dir, os.path.basename(gb_filename)) zip_arch.write(gb_filename, arch_filename) zip_arch.close() parser = DaesoArgParser(description=__doc__, epilog=epilog) parser.add_argument("zip_file", metavar="ZIP_FILE", help="filename of zip archive") parser.add_argument("pgc_files", nargs="+", metavar="CORPUS_FILE", help="parallel graph corpus filename, " "or quoted file name pattern for parallel graph corpora") args = parser.parse_args() pgc_zip(args.zip_file, args.pgc_files)
from glob import glob from sys import exit from daeso.utils.cli import DaesoArgParser, epilog from daeso.pgc.pgcstats import pgc_stats epilog = """ Examples: $ pgc_stats.py -efpu "*.pgc" Remarks: * Failed parses will only be exluded for graph banks in 'alpino' format. """ + epilog parser = DaesoArgParser(description=__doc__, epilog=epilog) parser.add_argument("pattern", help="*quoted* pattern for parallel graph corpus files") parser.add_argument("-a", "--with-all", action="store_true", dest="with_all", help="include all, sets options -efpru") #parser.add_argument("-c", "--csv", action="store_true", #dest="csv", #help="output in comma separated values") parser.add_argument("-e",
__authors__ = 'Erwin Marsi <*****@*****.**>' import os import sys from daeso.utils.cli import DaesoArgParser from daeso.pgc.corpus import ParallelGraphCorpus def log(s): if args.verbose: print >> sys.stderr, "***", s parser = DaesoArgParser(description=__doc__) parser.add_argument("filename", metavar="FILE", help="parallel graph corpus") parser.add_argument("-f", "--format", action="store_true", help="output indented XML") parser.add_argument("-p", "--parts", default=2, type=int, metavar="N", help="number of parts")
#!/usr/bin/env python # -*- coding: utf-8 -*- """ simple server providing access to the Alpino parser for Dutch through XML-RPC """ __author__ = 'Erwin Marsi <*****@*****.**>' __version__ = "0.9" from sys import exit from daeso.utils.cli import DaesoArgParser from daeso_nl.alpino.server import start_server, DEFAULT_HOST, DEFAULT_PORT parser = DaesoArgParser(description=__doc__, version="%(prog)s version " + __version__) parser.add_argument("-H", "--host", default="%s:%d" % (DEFAULT_HOST, DEFAULT_PORT), metavar="HOST[:PORT]", help="name or IP address of host (default is '%s') " "optionally followed by a port number " "(default is %d)" % (DEFAULT_HOST, DEFAULT_PORT)) parser.add_argument("-c", "--command", help="command line to start Alpino parser") parser.add_argument("-o", "--out_dir",
zip_arch.write(corpus_filename, arch_filename) corpus = ParallelGraphCorpus(inf=corpus_filename, graph_loading=LOAD_NONE) for gb in corpus._graphbanks(): gb_filename = gb.get_file_path() # add graphbank files to archive arch_filename = os.path.join( arch_dir, os.path.basename(gb_filename) ) zip_arch.write(gb_filename, arch_filename) zip_arch.close() parser = DaesoArgParser(description=__doc__, epilog=epilog) parser.add_argument( "zip_file", metavar="ZIP_FILE", help="filename of zip archive") parser.add_argument( "pgc_files", nargs="+", metavar="CORPUS_FILE", help="parallel graph corpus filename, " "or quoted file name pattern for parallel graph corpora") args = parser.parse_args()
""" evaluation of text alignment in parallel text corpora reports precision, recall and F-score on alignment for a certain tag for one or more pairs of true and predicted parallel text corpora """ from daeso.utils.cli import DaesoArgParser from daeso.ptc.evaluate import eval_alignment __authors__ = "Erwin Marsi <*****@*****.**>" parser = DaesoArgParser(description=__doc__) parser.add_argument("-t", "--true_corpora", nargs="+", help="parallel text corpus containing true alignments") parser.add_argument("-p", "--pred_corpora", nargs="+", help="parallel text corpus containing predicted alignments") parser.add_argument("--tag", default="s", help='only consider alignments involving this tag (defaults is "s"') args = parser.parse_args() assert len(args.true_corpora) == len(args.pred_corpora) eval_alignment(zip(args.true_corpora, args.pred_corpora), args.tag)
# GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program. If not, see <http://www.gnu.org/licenses/>. """ parallel text corpus diff reports difference in text alignments between two parallel text corpora """ from daeso.utils.cli import DaesoArgParser from daeso.ptc.diff import print_diff __authors__ = 'Erwin Marsi <*****@*****.**>' parser = DaesoArgParser(description=__doc__) parser.add_argument("true_corpus", help="parallel text corpus containing true alignments") parser.add_argument( "pred_corpus", help="parallel text corpus containing predicted alignments") parser.add_argument( "-t", "--tag", default="s", help='only consider alignments involving this tag (defaults is "s"') parser.add_argument("-e", "--encoding",
# GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program. If not, see <http://www.gnu.org/licenses/>. """ dump instance base Converts a Timbl instance file to a Timbl instance base file given a graph aligner configuration. Timbl options and filenames are taken from the configuration file (cf. the "timbl_inst_fname" and "timbl_ib_fname" attributes). """ __authors__ = 'Erwin Marsi <*****@*****.**>' __version__ = "0.9" import imp from daeso.utils.cli import DaesoArgParser from daeso_nl.ga.setup import dump_inst_base parser = DaesoArgParser(description=__doc__, version=__version__) parser.add_argument("config", metavar="FILE", help="configuration file to set up a corpus aligner") args = parser.parse_args() config = imp.load_source("config", args.config) dump_inst_base(config)
""" __author__ = 'Erwin Marsi <*****@*****.**>' __version__ = "0.9" import imp import sys from daeso.utils.cli import DaesoArgParser from daeso_nl.ga.setup import set_up_align_server from daeso_nl.ga.server import (start_server, DEFAULT_HOST, DEFAULT_PORT) #------------------------------------------------------------------------------- parser = DaesoArgParser(description=__doc__, version=__version__) parser.add_argument("config", metavar="FILE", help="configuration file to set up a graph align server") parser.add_argument( "-H", "--host", default="%s:%d" % (DEFAULT_HOST, DEFAULT_PORT), metavar="HOST[:PORT]", help="name or IP address of host (default is '%s') " % DEFAULT_HOST + "optionally followed by a port number (default is %d)" % DEFAULT_PORT) parser.add_argument("-l", "--log", action="store_true", help="log requests")
if verbose: banks = graph_pair.get_banks() columns = [ banks.source.get_file_path(), banks.target.get_file_path(), graphs.source.id, graphs.target.id, nodes.source, nodes.target ] + columns print delimiter.join(columns).encode("utf-8") parser = DaesoArgParser(description=__doc__.strip()) parser.add_argument( "corpus", nargs="+", metavar="FILE", help="parallel graph corpus file" ) parser.add_argument( "-d", "--delimiter", default="\t", help="column delimiter string (default is tab character '\\t')" ) parser.add_argument(
from glob import glob from daeso.utils.cli import DaesoArgParser, epilog from daeso.gb.gbstats import gb_stats epilog = """ Remarks: * Many columns will have zero values, because a parallel graph corpus is required to get alignment information; see pgc_stats.py * Failed parses will only be excluded for graph banks in 'alpino' format. """ + epilog parser = DaesoArgParser(description=__doc__, epilog=epilog) parser.add_argument( "pattern", help="*quoted* pattern for graph bank files") parser.add_argument( "-F", "--format", metavar="STRING", default="alpino", dest="format", help="treebank format (defaults to 'alpino')") parser.add_argument( "-a", "--with-all", action="store_true",
graphs.target.get_node_token_string(nodes.target) ] if verbose: banks = graph_pair.get_banks() columns = [ banks.source.get_file_path(), banks.target.get_file_path(), graphs.source.id, graphs.target.id, nodes.source, nodes.target ] + columns print delimiter.join(columns).encode("utf-8") parser = DaesoArgParser(description=__doc__.strip()) parser.add_argument("corpus", nargs="+", metavar="FILE", help="parallel graph corpus file") parser.add_argument( "-d", "--delimiter", default="\t", help="column delimiter string (default is tab character '\\t')") parser.add_argument( "-V", "--verbose",
# The Algraeph program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program. If not, see <http://www.gnu.org/licenses/>. __author__ = "Erwin Marsi <*****@*****.**>" from daeso.utils.cli import DaesoArgParser from graeph.release import version, description parser = DaesoArgParser(description=description.strip(), version=version) parser.add_argument( "corpus_file", metavar="FILE", nargs="?", help="parallel graph corpus file") parser.add_argument( "-d", "--dot_exec", metavar="FILE", help='"dot" graph drawing program') parser.add_argument( "-r", "--redirect", action='store_true',
""" align graphs in parallel graph corpus """ __authors__ = 'Erwin Marsi <*****@*****.**>' __version__ = "0.9" import imp from daeso.utils.cli import DaesoArgParser from daeso.utils.opsys import multiglob from daeso.pgc.corpus import ParallelGraphCorpus from daeso_nl.ga.setup import set_up_corpus_aligner parser = DaesoArgParser(description=__doc__, version=__version__) parser.add_argument("pgc_files", nargs="+", metavar="FILE", help="parallel graph corpus file") parser.add_argument("-c", "--config", metavar="FILE", help="configuration file to set up a corpus aligner") parser.add_argument("-x", "--clear", action="store_true", help="remove all existing alignments")
""" parallel graph corpus diff reports difference in node alignments between two parallel graph corpora """ from daeso.utils.cli import DaesoArgParser from daeso.pgc.corpus import ParallelGraphCorpus from daeso.pgc.diff import pgc_diff __authors__ = 'Erwin Marsi <*****@*****.**>' parser = DaesoArgParser(description=__doc__) parser.add_argument( "corpus1", help="first parallel graph corpus") parser.add_argument( "corpus2", help="second parallel graph corpus") parser.add_argument( "-1", "--first_annotator", metavar="NAME", default="First annotator",
corpus_dir = getenv("DAESO_CORPUS", "") if not corpus_dir: stderr.write("Warning: environment variable DAESO_CORPUS not found!" ) def expand_globs(corpus_dir, globs): files = [] for pattern in globs: files.extend(relglob(corpus_dir, pattern)) return files parser = DaesoArgParser(description=__doc__, version=__version__) parser.add_argument( "pgc_glob", nargs="+", help=( "glob (i.e. filename pattern) for parallel graph corpora, " "interpreted relative to the corpus base directory " "(cf. --corpus_dir)")) parser.add_argument( "-c", "--corpus-dir", default=corpus_dir, help="pgc filenames are interpreted relative to this base directory " "(default is '" + corpus_dir + "')")
use pgc_diff.py """ # TODO: # - check for at least two input files __authors__ = 'Erwin Marsi <*****@*****.**>' from daeso.utils.cli import DaesoArgParser from daeso.pgc.agreement import run_eval parser = DaesoArgParser(description=__doc__) parser.add_argument( "corpus_fns", nargs="+", metavar="corpus", help="parallel graph corpus file (at least two are required)") parser.add_argument( "-a", "--annotator", dest="annotators", metavar="CC", action="append", help="initials of the annotator of a parallel graph corpus files " "(default is 'A1', 'A2', etc.) Repeat this option as many times as " "there are corpus files")
# TODO: # - handle encoding errors # - reset cache __author__ = 'Erwin Marsi <*****@*****.**>' __version__ = '0.9' import sys import socket from xmlrpclib import ServerProxy, Fault from daeso.utils.cli import DaesoArgParser from daeso_nl.alpino.server import DEFAULT_HOST, DEFAULT_PORT from daeso_nl.alpino.client import alpino_client parser = DaesoArgParser(description=__doc__, version=__version__) parser.add_argument("-H", "--host", default="%s:%d" % (DEFAULT_HOST, DEFAULT_PORT), metavar="HOST[:PORT]", help="name or IP address of host (default is '%s') " "optionally followed by a port number " "(default is %d)" % (DEFAULT_HOST, DEFAULT_PORT)) parser.add_argument("-i", "--input-encoding", default="utf8", metavar="utf8|latin1|ascii|...", help="character encoding of input (default is utf8)")
import imp import sys from daeso.utils.cli import DaesoArgParser from daeso_nl.ga.setup import set_up_align_server from daeso_nl.ga.server import ( start_server, DEFAULT_HOST, DEFAULT_PORT ) #------------------------------------------------------------------------------- parser = DaesoArgParser(description=__doc__, version=__version__) parser.add_argument( "config", metavar="FILE", help="configuration file to set up a graph align server") parser.add_argument( "-H", "--host", default="%s:%d" % (DEFAULT_HOST, DEFAULT_PORT), metavar="HOST[:PORT]", help="name or IP address of host (default is '%s') " % DEFAULT_HOST + "optionally followed by a port number (default is %d)" % DEFAULT_PORT) parser.add_argument( "-l", "--log",
parallel graph corpora, and generates an analysis in terms of a number of statistics. For a more detailed analysis of the diferences between a pair of annotations, use pgc_diff.py """ # TODO: # - check for at least two input files __authors__ = 'Erwin Marsi <*****@*****.**>' from daeso.utils.cli import DaesoArgParser from daeso.pgc.agreement import run_eval parser = DaesoArgParser(description=__doc__) parser.add_argument( "corpus_fns", nargs="+", metavar="corpus", help="parallel graph corpus file (at least two are required)") parser.add_argument( "-a", "--annotator", dest="annotators", metavar="CC", action="append", help="initials of the annotator of a parallel graph corpus files " "(default is 'A1', 'A2', etc.) Repeat this option as many times as "
from daeso.utils.cli import DaesoArgParser, epilog from daeso.pgc.pgcstats import pgc_stats epilog = """ Examples: $ pgc_stats.py -efpu "*.pgc" Remarks: * Failed parses will only be exluded for graph banks in 'alpino' format. """ + epilog parser = DaesoArgParser(description=__doc__, epilog=epilog) parser.add_argument( "pattern", help="*quoted* pattern for parallel graph corpus files") parser.add_argument( "-a", "--with-all", action="store_true", dest="with_all", help="include all, sets options -efpru") #parser.add_argument("-c", "--csv", action="store_true", #dest="csv", #help="output in comma separated values")
""" evaluation of text alignment in parallel text corpora reports precision, recall and F-score on alignment for a certain tag for one or more pairs of true and predicted parallel text corpora """ from daeso.utils.cli import DaesoArgParser from daeso.ptc.evaluate import eval_alignment __authors__ = 'Erwin Marsi <*****@*****.**>' parser = DaesoArgParser(description=__doc__) parser.add_argument( "-t", "--true_corpora", nargs = "+", help="parallel text corpus containing true alignments") parser.add_argument( "-p", "--pred_corpora", nargs = "+", help="parallel text corpus containing predicted alignments") parser.add_argument( "--tag", default="s",
Meta-data of everything but the first corpus is discarded! """ # TODO: # - silence warning about meta-data __authors__ = 'Erwin Marsi <*****@*****.**>' import sys from daeso.utils.cli import DaesoArgParser from daeso.utils.opsys import multiglob from daeso.pgc.corpus import ParallelGraphCorpus parser = DaesoArgParser(description=__doc__) parser.add_argument("file", nargs="+", metavar="FILE", help="parallel graph corpus filename, " "or quoted file name pattern for parallel graph corpora") parser.add_argument("-f", "--format", action="store_true", help="output indented XML") parser.add_argument("-V", "--verbose", action="store_true",