def load_passage(filename, annotate=False): WIKIFIER.enabled = False converters = {k: partial(c, annotate=annotate) for k, c in FROM_FORMAT.items()} passages = ioutil.read_files_and_dirs(filename, converters=converters, attempts=1, delay=0) try: return next(iter(passages)) except StopIteration: return passages
pass return 0 def get_eval_type(scores): return UNLABELED if Config().is_unlabeled(scores.format) else LABELED # Marks input passages as text so that we don't accidentally train on them def from_text_format(*args, **kwargs): for passage in from_text(*args, **kwargs): passage.extra["format"] = "text" yield passage CONVERTERS = {k: partial(c, annotate=True) for k, c in FROM_FORMAT.items()} CONVERTERS[""] = CONVERTERS["txt"] = from_text_format def read_passages(args, files): expanded = [f for pattern in files for f in sorted(glob(pattern)) or (pattern,)] return ioutil.read_files_and_dirs(expanded, sentences=args.sentences, paragraphs=args.paragraphs, converters=CONVERTERS, lang=Config().args.lang) def filter_passages_for_bert(passages, args): from pytorch_pretrained_bert import BertTokenizer is_uncased_model = "uncased" in args.bert_model tokenizer = BertTokenizer.from_pretrained(args.bert_model, do_lower_case=is_uncased_model) for passage in passages: text = [node.text for node in passage.layer(layer0.LAYER_ID).all]
pass return 0 def get_eval_type(scores): return UNLABELED if Config().is_unlabeled(scores.format) else LABELED # Marks input passages as text so that we don't accidentally train on them def from_text_format(*args, **kwargs): for passage in from_text(*args, **kwargs): passage.extra["format"] = "text" yield passage CONVERTERS = {k: partial(c, annotate=True) for k, c in FROM_FORMAT.items()} CONVERTERS[""] = CONVERTERS["txt"] = from_text_format def read_passages(args, files): expanded = [f for pattern in files for f in sorted(glob(pattern)) or (pattern,)] return ioutil.read_files_and_dirs(expanded, sentences=args.sentences, paragraphs=args.paragraphs, converters=CONVERTERS, lang=Config().args.lang) # noinspection PyTypeChecker,PyStringFormat def main_generator(): args = Config().args assert args.passages or args.train, "Either passages or --train is required (use -h for help)" assert args.models or args.train or args.folds, "Either --model or --train or --folds is required" assert not (args.train or args.dev) or not args.folds, "--train and --dev are incompatible with --folds"
def open_out_file(spec, name=None): if spec.join: filename = spec.join if not filename.endswith(".conllu"): filename += ".conllu" return open(os.path.join(spec.out_dir, filename), "a", encoding="utf-8") return open(os.path.join(spec.out_dir, name + ".conllu"), "w", encoding="utf-8") CONVERTERS = { f: lambda l: (to_conllu_native(p) for p in c(l, passage_id=None)) for f, c in FROM_FORMAT.items() } CONVERTERS[ "conllu"] = split_by_empty_lines # If getting CoNLL-U as input, don't bother converting just to convert back def main(args): for spec in read_specs(args, converters=CONVERTERS): scores = [] sentences, to_parse = tee( (to_conllu_native(p), to_conllu_native(p, test=True, enhanced=False) ) if isinstance(p, core.Passage) else (p, strip_enhanced(p)) for p in spec.passages) t = tqdm(zip((x for x, _ in sentences), split_by_empty_lines(
"--label-map", help= "CSV file specifying mapping of input edge labels to output edge labels" ) add_boolean_option(argparser, "node-ids", "print tikz code rather than showing plots", short="i") argparser.add_argument("-f", "--format", choices=("png", "svg"), default="png", help="image format") args = argparser.parse_args() FROM_FORMAT = {k: partial(v, **vars(args)) for k, v in FROM_FORMAT.items()} if args.out_dir: os.makedirs(args.out_dir, exist_ok=True) for passage in get_passages_with_progress_bar(args.passages, desc="Visualizing", converters=FROM_FORMAT): map_labels(passage, args.label_map) if args.normalize: normalize(passage, extra=args.extra_normalization) if args.tikz: tikz = visualization.tikz(passage, node_ids=args.node_ids) if args.out_dir: with open(os.path.join(args.out_dir, passage.ID + ".tikz.txt"), "w") as f: print(tikz, file=f) else: