def main(args): """Subcommand main. You shouldn't need to call this yourself if you're using `config_argparser`. """ output_dir = get_output_dir(args, default_overwrite=True) # locate insertion site: target document reader = educe.stac.Reader(args.corpus) tgt_files = reader.filter(reader.files(), is_requested(args)) tgt_corpus = reader.slurp(tgt_files) # TODO mark units with FIXME, optionally delete in/out relations span = args.span sub_text = args.sub_text minor = args.minor # store before/after annos_before = [] annos_after = [] for tgt_k, tgt_doc in tgt_corpus.items(): annos_before.append(annotate_doc(tgt_doc, span=span)) # process new_tgt_doc = replace_text_at_span(tgt_doc, span, sub_text, minor=minor) # WIP new_span, depends on the offset offset = len(sub_text) - (span.char_end - span.char_start) new_span = Span(span.char_start, span.char_end + offset) # end WIP annos_after.append(annotate_doc(new_tgt_doc, span=new_span)) # show diff and save doc diffs = [ "======= REPLACE TEXT IN %s ========" % tgt_k, show_diff(tgt_doc, new_tgt_doc) ] print("\n".join(diffs).encode('utf-8'), file=sys.stderr) save_document(output_dir, tgt_k, new_tgt_doc) announce_output_dir(output_dir) # commit message tgt_k, tgt_doc = list(tgt_corpus.items())[0] anno_str_before = annos_before[0] anno_str_after = annos_after[0] if tgt_k and not args.no_commit_msg: print("-----8<------") print(commit_msg(tgt_k, anno_str_before, anno_str_after))
def anno(doc, prefix, tspan): "pad text segment as needed" prefix_t = "..."\ if tspan.char_start + len(prefix) < info.span.char_start\ else "" myspan = Span(info.span.char_start, tspan.char_end) return "".join([prefix, prefix_t, annotate_doc(doc, span=myspan)])
def main(args): """Subcommand main. You shouldn't need to call this yourself if you're using `config_argparser`. """ output_dir = get_output_dir(args, default_overwrite=True) # locate insertion site: target document reader = educe.stac.Reader(args.corpus) tgt_files = reader.filter(reader.files(), is_requested(args)) tgt_corpus = reader.slurp(tgt_files) # TODO mark units with FIXME, optionally delete in/out relations span = args.span sub_text = args.sub_text minor = args.minor # store before/after annos_before = [] annos_after = [] for tgt_k, tgt_doc in tgt_corpus.items(): annos_before.append(annotate_doc(tgt_doc, span=span)) # process new_tgt_doc = replace_text_at_span( tgt_doc, span, sub_text, minor=minor) # WIP new_span, depends on the offset offset = len(sub_text) - (span.char_end - span.char_start) new_span = Span(span.char_start, span.char_end + offset) # end WIP annos_after.append(annotate_doc(new_tgt_doc, span=new_span)) # show diff and save doc diffs = ["======= REPLACE TEXT IN %s ========" % tgt_k, show_diff(tgt_doc, new_tgt_doc)] print("\n".join(diffs).encode('utf-8'), file=sys.stderr) save_document(output_dir, tgt_k, new_tgt_doc) announce_output_dir(output_dir) # commit message tgt_k, tgt_doc = list(tgt_corpus.items())[0] anno_str_before = annos_before[0] anno_str_after = annos_after[0] if tgt_k and not args.no_commit_msg: print("-----8<------") print(commit_msg(tgt_k, anno_str_before, anno_str_after))
def anno(doc, prefix, tspan): "pad text segment as needed" prefix_t = "..."\ if tspan.char_start + len(prefix) < info.span.char_start\ else "" suffix_t = "..."\ if tspan.char_end > info.span.char_end + 1\ else "" return "".join( [prefix, prefix_t, annotate_doc(doc, span=info.span), suffix_t])
def anno(doc, prefix, tspan): "pad text segment as needed" prefix_t = "..."\ if tspan.char_start + len(prefix) < info.span.char_start\ else "" suffix_t = "..."\ if tspan.char_end > info.span.char_end + 1\ else "" return "".join([prefix, prefix_t, annotate_doc(doc, span=info.span), suffix_t])
def commit_msg(info): """ Generate a commit message describing the operation we just did """ k = info.key mini_new_doc = narrow_to_span(info.after, info.span) lines = [ "{}_{}: split dialogue before turn {}".format(k.doc, k.subdoc, info.tid), "", annotate_doc(mini_new_doc), "..." ] return "\n".join(lines)
def commit_msg(info): """ Generate a commit message describing the operation we just did """ k = info.key mini_new_doc = narrow_to_span(info.after, info.span) lines = ["{}_{}: split dialogue before turn {}".format( k.doc, k.subdoc, info.tid), "", annotate_doc(mini_new_doc), "..."] return "\n".join(lines)
def commit_msg(info): """ Generate a commit message describing the operation we just did """ k = info.key mini_new_doc = narrow_to_span(info.after, info.span) lines = ["%s_%s: move turn %d %s" % (k.doc, k.subdoc, info.tid, info.direction), "", annotate_doc(mini_new_doc), "..."] return "\n".join(lines)
def commit_msg(args, corpus, k, sought): """ Generate a commit message describing the dialogue merging operation we are about to do (has to be run before merging happens) """ doc = corpus[k] dstr = ", ".join(anno_id_from_tuple(x) for x in sought) dialogues = [_get_annotation_with_id(d, doc.units) for d in sought] if dialogues: title_fmt = u"{doc}_{subdoc}: merge dialogues{hint}" title_hint = " (turns %d-%d)" % tuple(args.turns) if args.turns else "" dspan = _merge_spans(dialogues) lines = [ title_fmt.format(doc=k.doc, subdoc=k.subdoc, hint=title_hint), "", "Dialogues ({}), was:".format(dstr), "", annotate_doc(doc, span=dspan) ] return "\n".join(lines) else: return "(no commit message; nothing to merge)"
def commit_msg(args, corpus, k, sought): """ Generate a commit message describing the dialogue merging operation we are about to do (has to be run before merging happens) """ doc = corpus[k] dstr = ", ".join(anno_id_from_tuple(x) for x in sought) dialogues = [_get_annotation_with_id(d, doc.units) for d in sought] if dialogues: title_fmt = u"{doc}_{subdoc}: merge dialogues{hint}" title_hint = " (turns %d-%d)" % tuple(args.turns) if args.turns else "" dspan = _merge_spans(dialogues) lines = [title_fmt.format(doc=k.doc, subdoc=k.subdoc, hint=title_hint), "", "Dialogues ({}), was:".format(dstr), "", annotate_doc(doc, span=dspan)] return "\n".join(lines) else: return "(no commit message; nothing to merge)"