def main(): argparser = configargparse.ArgParser(description=desc) argparser.add_argument("filenames", nargs="+", help="file names to convert and evaluate") add_verbose_arg(argparser, help="detailed evaluation output") add_boolean_option(argparser, "wikification", "Spotlight to wikify any named node (for AMR)") argparser.add_argument("-o", "--out-dir", help="output directory (if unspecified, files are not written)") args = argparser.parse_args() scores = [] for pattern in args.filenames: filenames = glob(pattern) if not filenames: raise IOError("Not found: " + pattern) for filename in filenames: print("\rConverting '%s'" % filename, end="") if args.out_dir or args.verbose: print(flush=True) basename, ext = os.path.splitext(os.path.basename(filename)) passage_format = ext.lstrip(".") converters = CONVERTERS.get(passage_format, CONVERTERS["amr"]) evaluator = EVALUATORS.get(passage_format, EVALUATORS["amr"]).evaluate with open(filename, encoding="utf-8") as f: for passage, ref, passage_id in converters[0](f, passage_id=basename, return_original=True): if args.out_dir: os.makedirs(args.out_dir, exist_ok=True) outfile = "%s/%s.xml" % (args.out_dir, passage.ID) print("Writing '%s'..." % outfile, file=sys.stderr, flush=True) ioutil.passage2file(passage, outfile) try: guessed = converters[1](passage, wikification=args.wikification, use_original=False) except Exception as e: raise ValueError("Error converting %s back from %s" % (filename, passage_format)) from e if args.out_dir: outfile = "%s/%s%s" % (args.out_dir, passage.ID, ext) print("Writing '%s'..." % outfile, file=sys.stderr, flush=True) with open(outfile, "w", encoding="utf-8") as f_out: print("\n".join(guessed), file=f_out) try: s = evaluator(guessed, ref, verbose=args.verbose > 1) except Exception as e: raise ValueError("Error evaluating conversion of %s" % filename) from e scores.append(s) if args.verbose: print(passage_id) s.print() print() if args.verbose and len(scores) > 1: print("Aggregated scores:") Scores(scores).print() sys.exit(0)
def add_convert_args(p): add_boolean_option(p, "test", "omit prediction columns (head and deprel for conll; top, pred, frame, etc. for sdp)", short="t") add_boolean_option(p, "tree", "remove multiple parents to get a tree", short="T") add_boolean_option(p, "split", "split each sentence to its own passage", short="s") add_boolean_option(p, "mark-aux", "mark auxiliary edges introduced/omit edges", short="m") p.add_argument("--label-map", help="CSV file specifying mapping of input edge labels to output edge labels")
def __init__(self, *args): self.arg_parser = ap = ArgParser(description="Transition-based parser for UCCA.", formatter_class=ArgumentDefaultsHelpFormatter) ap.add_argument("passages", nargs="*", help="passage files/directories to test on/parse") ap.add_argument("--version", action="version", version="") ap.add_argument("-C", "--config", is_config_file=True, help="configuration file to get arguments from") ap.add_argument("-m", "--models", nargs="+", help="model file basename(s) to load/save, ensemble if >1 " "(default: <format>_<model_type>") ap.add_argument("-c", "--classifier", choices=CLASSIFIERS, default=BIRNN, help="model type") ap.add_argument("-B", "--beam", type=int, choices=(1,), default=1, help="beam size for beam search") add_boolean_option(ap, "evaluate", "evaluation of parsed passages", short="e") add_verbose_arg(ap, help="detailed parse output") constructions.add_argument(ap) add_boolean_option(ap, "sentences", "split to sentences") add_boolean_option(ap, "paragraphs", "split to paragraphs") ap.add_argument("--timeout", type=float, help="max number of seconds to wait for a single passage") group = ap.add_argument_group(title="Training parameters") group.add_argument("-t", "--train", nargs="+", default=(), help="passage files/directories to train on") group.add_argument("-d", "--dev", nargs="+", default=(), help="passage files/directories to tune on") group.add_argument("-I", "--iterations", nargs="+", type=Iterations, default=(Iterations(50), Iterations("100 --optimizer=" + EXTRA_TRAINER)), help="number of training iterations along with optional hyperparameters per part") group.add_argument("--folds", type=int, choices=(3, 5, 10), help="#folds for cross validation") group.add_argument("--seed", type=int, default=1, help="random number generator seed") add_boolean_option(group, "early-update", "early update procedure (finish example on first error)") group.add_argument("--save-every", type=int, help="every this many passages, evaluate on dev and save model") add_boolean_option(group, "eval-test", "evaluate on test whenever evaluating on dev, but keep results hidden") add_boolean_option(group, "ignore-case", "pre-convert all input files to lower-case in training and test") group = ap.add_argument_group(title="Output files") group.add_argument("-o", "--outdir", default=".", help="output directory for parsed files") group.add_argument("-p", "--prefix", default="", help="output filename prefix") add_boolean_option(group, "write", "writing parsed output to files", default=True, short_no="W") group.add_argument("-j", "--join", help="if output format is textual, write all to one file with this basename") group.add_argument("-l", "--log", help="output log file (default: model filename + .log)") group.add_argument("--devscores", help="output CSV file for dev scores (default: model filename + .dev.csv)") group.add_argument("--testscores", help="output CSV file for test scores (default: model filename + .test.csv)") group.add_argument("--action-stats", help="output CSV file for action statistics") add_boolean_option(group, "normalize", "apply normalizations to output in case format is UCCA", default=False) ap.add_argument("-f", "--formats", nargs="+", choices=FILE_FORMATS, default=(), help="input formats for creating all parameters before training starts " "(otherwise created dynamically based on filename suffix), " "and output formats for written files (each will be written; default: UCCA XML)") ap.add_argument("-u", "--unlabeled", nargs="*", choices=FORMATS, help="to ignore labels in") ap.add_argument("--lang", default="en", help="two-letter language code to use as the default language") add_boolean_option(ap, "multilingual", "separate model parameters per language (passage.attrib['lang'])") group = ap.add_argument_group(title="Sanity checks") add_boolean_option(group, "check-loops", "check for parser state loop") add_boolean_option(group, "verify", "check for oracle reproducing original passage") add_boolean_option(group, "validate-oracle", "require oracle output to respect constraints", default=True) add_param_arguments(ap) group = ap.add_argument_group(title="DyNet parameters") group.add_argument("--dynet-mem", help="memory for dynet") group.add_argument("--dynet-weight-decay", type=float, default=1e-5, help="weight decay for parameters") add_boolean_option(group, "dynet-apply-weight-decay-on-load", "workaround for clab/dynet#1206", default=False) add_boolean_option(group, "dynet-gpu", "GPU for training") group.add_argument("--dynet-gpus", type=int, default=1, help="how many GPUs you want to use") add_boolean_option(group, "dynet-autobatch", "auto-batching of training examples") DYNET_ARG_NAMES.update(get_group_arg_names(group)) ap.add_argument("-H", "--hyperparams", type=HyperparamsInitializer.action, nargs="*", help="shared hyperparameters or hyperparameters for specific formats, " 'e.g., "shared --lstm-layer-dim=100 --lstm-layers=1" "ucca --word-dim=300"', default=[HyperparamsInitializer.action("shared --lstm-layers 2")]) ap.add_argument("--copy-shared", nargs="*", choices=FORMATS, help="formats whose parameters shall be " "copied from loaded shared parameters") self.args = FallbackNamespace(ap.parse_args(args if args else None)) if self.args.config: print("Loading configuration from '%s'." % self.args.config) if self.args.passages and self.args.write: os.makedirs(self.args.outdir, exist_ok=True) if self.args.models: if not self.args.log: self.args.log = self.args.models[0] + ".log" if self.args.dev and not self.args.devscores: self.args.devscores = self.args.models[0] + ".dev.csv" if self.args.passages and not self.args.testscores: self.args.testscores = self.args.models[0] + ".test.csv" elif not self.args.log: self.args.log = "parse.log" self.sub_configs = [] # Copies to be stored in Models so that they do not interfere with each other self._logger = self.format = self.hyperparams = self.iteration_hyperparams = None self._vocab = {} self.original_values = {} self.random = np.random self.update()
def __init__(self, *args): self.arg_parser = ap = ArgParser( description="Transition-based parser for UCCA.", formatter_class=ArgumentDefaultsHelpFormatter) add_boolean_option(ap, "use-bert", default=False, description="whether to use bert embeddings") ap.add_argument("--bert-model", choices=[ "bert-base-uncased", "bert-large-uncased", "bert-base-cased", "bert-large-cased", "bert-base-multilingual-cased" ], default="bert-base-multilingual-cased") ap.add_argument("--bert-layers", type=int, nargs='+', default=[-1, -2, -3, -4]) ap.add_argument("--bert-layers-pooling", choices=["weighted", "sum", "concat"], default="weighted") ap.add_argument("--bert-token-align-by", choices=["first", "sum", "mean"], default="sum") ap.add_argument("--bert-multilingual", choices=[0], type=int) add_boolean_option( ap, "bert-use-default-word-embeddings", default=False, description="whether to use default word embeddings") ap.add_argument("--bert-dropout", type=float, default=0, choices=np.linspace(0, 0.9, num=10)) ap.add_argument("passages", nargs="*", help="passage files/directories to test on/parse") ap.add_argument("--version", action="version", version="") ap.add_argument("-C", "--config", is_config_file=True, help="configuration file to get arguments from") ap.add_argument( "-m", "--models", nargs="+", help="model file basename(s) to load/save, ensemble if >1 " "(default: <format>_<model_type>") ap.add_argument("-c", "--classifier", choices=CLASSIFIERS, default=BIRNN, help="model type") ap.add_argument("-B", "--beam", type=int, choices=(1, ), default=1, help="beam size for beam search") add_boolean_option(ap, "evaluate", "evaluation of parsed passages", short="e") add_verbose_arg(ap, help="detailed parse output") constructions.add_argument(ap) add_boolean_option(ap, "sentences", "split to sentences") add_boolean_option(ap, "paragraphs", "split to paragraphs") ap.add_argument( "--timeout", type=float, help="max number of seconds to wait for a single passage") group = ap.add_argument_group(title="Training parameters") group.add_argument("-t", "--train", nargs="+", default=(), help="passage files/directories to train on") group.add_argument("-d", "--dev", nargs="+", default=(), help="passage files/directories to tune on") group.add_argument( "-I", "--iterations", nargs="+", type=Iterations, default=(Iterations(50), Iterations("100 --optimizer=" + EXTRA_TRAINER)), help= "number of training iterations along with optional hyperparameters per part" ) group.add_argument("--folds", type=int, choices=(3, 5, 10), help="#folds for cross validation") group.add_argument("--seed", type=int, default=1, help="random number generator seed") add_boolean_option( group, "early-update", "early update procedure (finish example on first error)") group.add_argument( "--save-every", type=int, help="every this many passages, evaluate on dev and save model") add_boolean_option( group, "eval-test", "evaluate on test whenever evaluating on dev, but keep results hidden" ) add_boolean_option( group, "ignore-case", "pre-convert all input files to lower-case in training and test") group = ap.add_argument_group(title="Output files") group.add_argument("-o", "--outdir", default=".", help="output directory for parsed files") group.add_argument("-p", "--prefix", default="", help="output filename prefix") add_boolean_option(group, "write", "writing parsed output to files", default=True, short_no="W") group.add_argument( "-j", "--join", help= "if output format is textual, write all to one file with this basename" ) group.add_argument( "-l", "--log", help="output log file (default: model filename + .log)") group.add_argument( "--devscores", help= "output CSV file for dev scores (default: model filename + .dev.csv)" ) group.add_argument( "--testscores", help= "output CSV file for test scores (default: model filename + .test.csv)" ) group.add_argument("--action-stats", help="output CSV file for action statistics") add_boolean_option( group, "normalize", "apply normalizations to output in case format is UCCA", default=False) ap.add_argument( "-f", "--formats", nargs="+", choices=FILE_FORMATS, default=(), help= "input formats for creating all parameters before training starts " "(otherwise created dynamically based on filename suffix), " "and output formats for written files (each will be written; default: UCCA XML)" ) ap.add_argument("-u", "--unlabeled", nargs="*", choices=FORMATS, help="to ignore labels in") ap.add_argument( "--lang", default="en", help="two-letter language code to use as the default language") add_boolean_option( ap, "multilingual", "separate model parameters per language (passage.attrib['lang'])") group = ap.add_argument_group(title="Sanity checks") add_boolean_option(group, "check-loops", "check for parser state loop") add_boolean_option(group, "verify", "check for oracle reproducing original passage") add_boolean_option(group, "validate-oracle", "require oracle output to respect constraints", default=True) add_param_arguments(ap) group = ap.add_argument_group(title="DyNet parameters") group.add_argument("--dynet-mem", help="memory for dynet") group.add_argument("--dynet-weight-decay", type=float, default=1e-5, help="weight decay for parameters") add_boolean_option(group, "dynet-apply-weight-decay-on-load", "workaround for clab/dynet#1206", default=False) add_boolean_option(group, "dynet-gpu", "GPU for training") group.add_argument("--dynet-gpus", type=int, default=1, help="how many GPUs you want to use") add_boolean_option(group, "dynet-autobatch", "auto-batching of training examples") add_boolean_option(group, "dynet-check-validity", "check validity of expressions immediately") DYNET_ARG_NAMES.update(get_group_arg_names(group)) ap.add_argument( "-H", "--hyperparams", type=HyperparamsInitializer.action, nargs="*", help= "shared hyperparameters or hyperparameters for specific formats, " 'e.g., "shared --lstm-layer-dim=100 --lstm-layers=1" "ucca --word-dim=300"', default=[HyperparamsInitializer.action("shared --lstm-layers 2")]) ap.add_argument("--copy-shared", nargs="*", choices=FORMATS, help="formats whose parameters shall be " "copied from loaded shared parameters") self.args = FallbackNamespace(ap.parse_args(args if args else None)) if self.args.config: print("Loading configuration from '%s'." % self.args.config) if self.args.passages and self.args.write: os.makedirs(self.args.outdir, exist_ok=True) if self.args.models: if not self.args.log: self.args.log = self.args.models[0] + ".log" if self.args.dev and not self.args.devscores: self.args.devscores = self.args.models[0] + ".dev.csv" if self.args.passages and not self.args.testscores: self.args.testscores = self.args.models[0] + ".test.csv" elif not self.args.log: self.args.log = "parse.log" self.sub_configs = [ ] # Copies to be stored in Models so that they do not interfere with each other self._logger = self.format = self.hyperparams = self.iteration_hyperparams = None self._vocab = {} self.original_values = {} self.random = np.random self.update()
help="default format (if cannot determine by suffix)") argparser.add_argument( "-o", "--out-file", help="file to write results for each evaluated passage to in CSV format" ) argparser.add_argument( "-s", "--summary-file", help="file to write aggregated scores to, in CSV format") argparser.add_argument( "-c", "--counts-file", help="file to write aggregated counts to, in CSV format") add_boolean_option(argparser, "unlabeled", "print unlabeled F1 for individual passages", short="u") add_boolean_option(argparser, "enhanced", "read enhanced dependencies", default=True) add_boolean_option(argparser, "normalize", "normalize passages before evaluation", short="N", default=True) add_boolean_option(argparser, "matching-ids", "skip passages without a match (by ID)", short="i") add_boolean_option(argparser,
if __name__ == '__main__': argparser = configargparse.ArgParser(description=desc) argparser.add_argument("filenames", nargs="+", help="file names to convert and evaluate") argparser.add_argument( "-f", "--format", choices=CONVERTERS, default="amr", help="default format (if cannot determine by suffix)") add_verbose_arg(argparser, help="detailed evaluation output") argparser.add_argument("--units", action="store_true", help="print mutual and unique units") add_boolean_option(argparser, "wikification", "Spotlight to wikify any named node (for AMR)") argparser.add_argument( "-o", "--out-dir", help="output directory (if unspecified, files are not written)") argparser.add_argument("-n", "--normalize", action="store_true", help="normalize passages before conversion") argparser.add_argument("-e", "--extra-normalization", action="store_true", help="more normalization rules") main(check_args(argparser, argparser.parse_args()))
from ucca import visualization from ucca.ioutil import get_passages_with_progress_bar, external_write_mode from ucca.normalization import normalize from semstr.cfgutil import add_boolean_option from semstr.convert import FROM_FORMAT, map_labels FROM_FORMAT["txt"] = FROM_FORMAT["amr"] if __name__ == "__main__": argparser = ArgumentParser( description="Visualize the given passages as graphs.") argparser.add_argument("passages", nargs="+", help="Passages in any format") add_boolean_option(argparser, "tikz", "print tikz code rather than showing plots") argparser.add_argument( "--out-dir", help="directory to save figures in (otherwise displayed immediately)") add_boolean_option(argparser, "normalize", "normalize passage", default=True) add_boolean_option(argparser, "extra-normalization", "more normalization rules") add_boolean_option(argparser, "enhanced", "read enhanced dependencies", default=True) argparser.add_argument( "--label-map",
add_boolean_option(p, "test", "omit prediction columns (head and deprel for conll; top, pred, frame, etc. for sdp)", short="t") add_boolean_option(p, "tree", "remove multiple parents to get a tree", short="T") add_boolean_option(p, "split", "split each sentence to its own passage", short="s") add_boolean_option(p, "mark-aux", "mark auxiliary edges introduced/omit edges", short="m") p.add_argument("--label-map", help="CSV file specifying mapping of input edge labels to output edge labels") if __name__ == '__main__': argparser = configargparse.ArgParser(description=description) argparser.add_argument("filenames", nargs="+", help="file names to convert") argparser.add_argument("-i", "--input-format", choices=CONVERTERS, help="input file format (detected by extension)") argparser.add_argument("-f", "--output-format", choices=CONVERTERS, help="output file format (default: UCCA)") argparser.add_argument("-o", "--out-dir", default=".", help="output directory") argparser.add_argument("-j", "--join", help="concatenate all output files to a file with this name") argparser.add_argument("-p", "--prefix", default="", help="output passage ID prefix") add_boolean_option(argparser, "binary", "write in binary format (.%s)" % UCCA_EXT[1], short="b") add_boolean_option(argparser, "annotate", "store dependency annotations in 'extra' dict", short="a") add_boolean_option(argparser, "validate", "validate every passage after conversion", short="V") add_boolean_option(argparser, "ucca-validation", "apply UCCA-specific validations", short="u") add_boolean_option(argparser, "enhanced", "read enhanced dependencies", default=True) add_boolean_option(argparser, "wikification", "AMR wikification", default=True) argparser.add_argument("--default-label", help="use this for missing AMR labels, otherwise raise exception") add_boolean_option(argparser, "normalize", "normalize passage", default=True) add_boolean_option(argparser, "extra-normalization", "more normalization rules") argparser.add_argument("-l", "--lang", help="small two-letter language code to set in output passage metadata") add_convert_args(argparser) add_verbose_arg(argparser, help="detailed output") main(argparser.parse_args()) sys.exit(0)