def main(args): """ Subcommand main. You shouldn't need to call this yourself if you're using `config_argparser` """ if args.stage: if args.stage != 'unannotated' and not args.annotator: sys.exit("--annotator is required unless --stage is unannotated") elif args.stage == 'unannotated' and args.annotator: sys.exit("--annotator is forbidden if --stage is unannotated") output_dir = get_output_dir(args, default_overwrite=True) corpus = read_corpus(args, verbose=True) source = args.source target = _get_target(args, source, corpus) for k in corpus: print(k) doc = corpus[k] _rename_in_doc(source, target, doc) save_document(output_dir, k, doc) pretty_source = anno_id_from_tuple(source) pretty_target = anno_id_from_tuple(target) print("Renamed from %s to %s" % (pretty_source, pretty_target), file=sys.stderr) announce_output_dir(output_dir)
def main(args): """ Subcommand main. You shouldn't need to call this yourself if you're using `config_argparser` """ corpus = read_corpus(args, verbose=True) tcache = TimestampCache() output_dir = get_output_dir(args, default_overwrite=True) for key in corpus: print(key) new_doc = corpus[key] old_doc = copy.deepcopy(new_doc) span = _split_dialogue(tcache, new_doc, args.turn) diffs = _mini_diff(key, args, old_doc, new_doc, span) print("\n".join(diffs).encode('utf-8'), file=sys.stderr) save_document(output_dir, key, new_doc) commit_info = CommitInfo(key=key, before=old_doc, after=new_doc, span=span, tid=args.turn) announce_output_dir(output_dir) if commit_info and not args.no_commit_msg: print("-----8<------") print(commit_msg(commit_info))
def main(args): """ Subcommand main. You shouldn't need to call this yourself if you're using `config_argparser` """ corpus = read_corpus(args, verbose=True) output_dir = get_output_dir(args, default_overwrite=True) for k in corpus: doc = corpus[k] if args.diff_friendly: doc.units = _diff_friendly(doc.units) doc.relations = _diff_friendly(doc.relations) doc.schemas = _diff_friendly(doc.schemas) save_document(output_dir, k, doc) announce_output_dir(output_dir)
def main(): "create a .seg file for every file in the corpus" args = mk_argparser().parse_args() corpus = read_corpus(args) output_dir = get_output_dir(args) if args.pipeline: args.resources = True args.resource_status = False args.dialogue_acts = False args.dialogue_boundaries = False args.fake_turn_ids = True config = Config(emit_resources=args.resources, emit_resource_status=args.resource_status, emit_dialogue_acts=args.dialogue_acts, emit_dialogue_boundaries=args.dialogue_boundaries, fake_turn_ids=args.fake_turn_ids) for key in corpus: process_document(config, corpus, key, output_dir) announce_output_dir(output_dir)
def main(args): """ Subcommand main. You shouldn't need to call this yourself if you're using `config_argparser` """ corpus = read_corpus(args, preselected={'stage': ['discourse', 'units']}) output_dir = get_output_dir(args, default_overwrite=True) for key in corpus: doc = corpus[key] to_delete = [] for sch in doc.schemas: if not sch.members: to_delete.append(sch) for sch in to_delete: doc.schemas.remove(sch) save_document(output_dir, key, doc) announce_output_dir(output_dir)
def main(args): """ Subcommand main. You shouldn't need to call this yourself if you're using `config_argparser` """ corpus = read_corpus(args, preselected={"stage": ["units"]}) output_dir = get_output_dir(args, default_overwrite=True) for k in corpus: doc = corpus[k] for edu in [x for x in doc.units if educe.stac.is_edu(x)]: etypes = frozenset(educe.stac.split_type(edu)) etypes2 = frozenset(RENAMES.get(t, t) for t in etypes) if etypes != etypes2: edu.type = "/".join(sorted(etypes2)) save_document(output_dir, k, doc) announce_output_dir(output_dir)
def main(args): """ Subcommand main. You shouldn't need to call this yourself if you're using `config_argparser` """ _screen_args(args) corpus = read_corpus(args, verbose=True) output_dir = get_output_dir(args, default_overwrite=True) old_span = args.span new_span = Span(old_span.char_start + args.nudge_start, old_span.char_end + args.nudge_end) for k in corpus: old_doc = corpus[k] new_doc = copy.deepcopy(old_doc) found = False for anno in new_doc.units: if anno.span == old_span: anno.span = copy.deepcopy(new_span) found = True if found: diffs = _mini_diff(k, (old_doc, old_span), (new_doc, new_span)) print("\n".join(diffs).encode('utf-8'), file=sys.stderr) else: print("WARNING: No annotations found for %s in %s" % (old_span, k), file=sys.stderr) save_document(output_dir, k, new_doc) # for commit message generation span = old_span.merge(new_span) commit_info = CommitInfo(key=k, before=old_doc, after=new_doc, span=span) if commit_info and not args.no_commit_msg: print("-----8<------") print(commit_msg(commit_info)) announce_output_dir(output_dir)
def main(args): """ Subcommand main. You shouldn't need to call this yourself if you're using `config_argparser` """ if args.stage: if args.stage != 'unannotated' and not args.annotator: sys.exit("--annotator is required unless --stage is unannotated") elif args.stage == 'unannotated' and args.annotator: sys.exit("--annotator is forbidden if --stage is unannotated") output_dir = get_output_dir(args, default_overwrite=True) corpus = read_corpus(args, verbose=True) for key in corpus: print(key) doc = corpus[key] _delete_in_doc(args.anno_id, doc) save_document(output_dir, key, doc) pretty_id = anno_id_from_tuple(args.anno_id) print("Deleted %s" % pretty_id, file=sys.stderr) announce_output_dir(output_dir)
def main(args): """ Subcommand main. You shouldn't need to call this yourself if you're using `config_argparser` """ if not args.turns and len(args.dialogues) < 2: sys.exit("Must specify at least two dialogues") output_dir = get_output_dir(args, default_overwrite=True) corpus = read_corpus(args, verbose=True) if args.turns: try: sought = _dialogues_in_turns(corpus, args.turns[0], args.turns[1]) if len(sought) < 2: sys.exit("Must specify at least two dialogues") print("Merging dialogues: " + ", ".join(anno_id_from_tuple(x) for x in sought), file=sys.stderr) except GlozzException as oops: sys.exit(str(oops)) else: sought = args.dialogues if corpus and not args.no_commit_msg: key0 = list(corpus)[0] # compute this before we change things cmsg = commit_msg(args, corpus, key0, sought) for k in corpus: doc = corpus[k] _merge_dialogues_in_document(sought, doc) save_document(output_dir, k, doc) announce_output_dir(output_dir) if corpus and not args.no_commit_msg: print("-----8<------") print(cmsg)