Exemple #1
0
def main(args):
    """Subcommand main.

    You shouldn't need to call this yourself if you're using
    `config_argparser`.
    """
    output_dir = get_output_dir(args, default_overwrite=True)

    # locate insertion site: target document
    reader = educe.stac.Reader(args.corpus)
    tgt_files = reader.filter(reader.files(), is_requested(args))
    tgt_corpus = reader.slurp(tgt_files)

    # TODO mark units with FIXME, optionally delete in/out relations
    span = args.span
    sub_text = args.sub_text
    minor = args.minor
    # store before/after
    annos_before = []
    annos_after = []
    for tgt_k, tgt_doc in tgt_corpus.items():
        annos_before.append(annotate_doc(tgt_doc, span=span))
        # process
        new_tgt_doc = replace_text_at_span(tgt_doc,
                                           span,
                                           sub_text,
                                           minor=minor)
        # WIP new_span, depends on the offset
        offset = len(sub_text) - (span.char_end - span.char_start)
        new_span = Span(span.char_start, span.char_end + offset)
        # end WIP
        annos_after.append(annotate_doc(new_tgt_doc, span=new_span))
        # show diff and save doc
        diffs = [
            "======= REPLACE TEXT IN %s   ========" % tgt_k,
            show_diff(tgt_doc, new_tgt_doc)
        ]
        print("\n".join(diffs).encode('utf-8'), file=sys.stderr)
        save_document(output_dir, tgt_k, new_tgt_doc)
    announce_output_dir(output_dir)
    # commit message
    tgt_k, tgt_doc = list(tgt_corpus.items())[0]
    anno_str_before = annos_before[0]
    anno_str_after = annos_after[0]
    if tgt_k and not args.no_commit_msg:
        print("-----8<------")
        print(commit_msg(tgt_k, anno_str_before, anno_str_after))
Exemple #2
0
    def anno(doc, prefix, tspan):
        "pad text segment as needed"

        prefix_t = "..."\
            if tspan.char_start + len(prefix) < info.span.char_start\
            else ""
        myspan = Span(info.span.char_start, tspan.char_end)
        return "".join([prefix, prefix_t, annotate_doc(doc, span=myspan)])
Exemple #3
0
def main(args):
    """Subcommand main.

    You shouldn't need to call this yourself if you're using
    `config_argparser`.
    """
    output_dir = get_output_dir(args, default_overwrite=True)

    # locate insertion site: target document
    reader = educe.stac.Reader(args.corpus)
    tgt_files = reader.filter(reader.files(), is_requested(args))
    tgt_corpus = reader.slurp(tgt_files)

    # TODO mark units with FIXME, optionally delete in/out relations
    span = args.span
    sub_text = args.sub_text
    minor = args.minor
    # store before/after
    annos_before = []
    annos_after = []
    for tgt_k, tgt_doc in tgt_corpus.items():
        annos_before.append(annotate_doc(tgt_doc, span=span))
        # process
        new_tgt_doc = replace_text_at_span(
            tgt_doc, span, sub_text, minor=minor)
        # WIP new_span, depends on the offset
        offset = len(sub_text) - (span.char_end - span.char_start)
        new_span = Span(span.char_start, span.char_end + offset)
        # end WIP
        annos_after.append(annotate_doc(new_tgt_doc, span=new_span))
        # show diff and save doc
        diffs = ["======= REPLACE TEXT IN %s   ========" % tgt_k,
                 show_diff(tgt_doc, new_tgt_doc)]
        print("\n".join(diffs).encode('utf-8'), file=sys.stderr)
        save_document(output_dir, tgt_k, new_tgt_doc)
    announce_output_dir(output_dir)
    # commit message
    tgt_k, tgt_doc = list(tgt_corpus.items())[0]
    anno_str_before = annos_before[0]
    anno_str_after = annos_after[0]
    if tgt_k and not args.no_commit_msg:
        print("-----8<------")
        print(commit_msg(tgt_k, anno_str_before, anno_str_after))
Exemple #4
0
    def anno(doc, prefix, tspan):
        "pad text segment as needed"

        prefix_t = "..."\
            if tspan.char_start + len(prefix) < info.span.char_start\
            else ""
        myspan = Span(info.span.char_start, tspan.char_end)
        return "".join([prefix,
                        prefix_t,
                        annotate_doc(doc, span=myspan)])
Exemple #5
0
    def anno(doc, prefix, tspan):
        "pad text segment as needed"

        prefix_t = "..."\
            if tspan.char_start + len(prefix) < info.span.char_start\
            else ""
        suffix_t = "..."\
            if tspan.char_end > info.span.char_end + 1\
            else ""
        return "".join(
            [prefix, prefix_t,
             annotate_doc(doc, span=info.span), suffix_t])
Exemple #6
0
    def anno(doc, prefix, tspan):
        "pad text segment as needed"

        prefix_t = "..."\
            if tspan.char_start + len(prefix) < info.span.char_start\
            else ""
        suffix_t = "..."\
            if tspan.char_end > info.span.char_end + 1\
            else ""
        return "".join([prefix,
                        prefix_t,
                        annotate_doc(doc, span=info.span),
                        suffix_t])
Exemple #7
0
def commit_msg(info):
    """
    Generate a commit message describing the operation
    we just did
    """
    k = info.key
    mini_new_doc = narrow_to_span(info.after, info.span)

    lines = [
        "{}_{}: split dialogue before turn {}".format(k.doc, k.subdoc,
                                                      info.tid), "",
        annotate_doc(mini_new_doc), "..."
    ]
    return "\n".join(lines)
Exemple #8
0
def commit_msg(info):
    """
    Generate a commit message describing the operation
    we just did
    """
    k = info.key
    mini_new_doc = narrow_to_span(info.after, info.span)

    lines = ["{}_{}: split dialogue before turn {}".format(
        k.doc, k.subdoc, info.tid),
             "",
             annotate_doc(mini_new_doc),
             "..."]
    return "\n".join(lines)
Exemple #9
0
def commit_msg(info):
    """
    Generate a commit message describing the operation
    we just did
    """
    k = info.key
    mini_new_doc = narrow_to_span(info.after, info.span)

    lines = ["%s_%s: move turn %d %s" % (k.doc, k.subdoc,
                                         info.tid, info.direction),
             "",
             annotate_doc(mini_new_doc),
             "..."]
    return "\n".join(lines)
Exemple #10
0
def commit_msg(args, corpus, k, sought):
    """
    Generate a commit message describing the dialogue merging operation
    we are about to do (has to be run before merging happens)
    """
    doc = corpus[k]
    dstr = ", ".join(anno_id_from_tuple(x) for x in sought)
    dialogues = [_get_annotation_with_id(d, doc.units) for d in sought]
    if dialogues:
        title_fmt = u"{doc}_{subdoc}: merge dialogues{hint}"
        title_hint = " (turns %d-%d)" % tuple(args.turns) if args.turns else ""
        dspan = _merge_spans(dialogues)
        lines = [
            title_fmt.format(doc=k.doc, subdoc=k.subdoc, hint=title_hint), "",
            "Dialogues ({}), was:".format(dstr), "",
            annotate_doc(doc, span=dspan)
        ]
        return "\n".join(lines)
    else:
        return "(no commit message; nothing to merge)"
Exemple #11
0
def commit_msg(args, corpus, k, sought):
    """
    Generate a commit message describing the dialogue merging operation
    we are about to do (has to be run before merging happens)
    """
    doc = corpus[k]
    dstr = ", ".join(anno_id_from_tuple(x) for x in sought)
    dialogues = [_get_annotation_with_id(d, doc.units) for d in sought]
    if dialogues:
        title_fmt = u"{doc}_{subdoc}: merge dialogues{hint}"
        title_hint = " (turns %d-%d)" % tuple(args.turns) if args.turns else ""
        dspan = _merge_spans(dialogues)
        lines = [title_fmt.format(doc=k.doc,
                                  subdoc=k.subdoc,
                                  hint=title_hint),
                 "",
                 "Dialogues ({}), was:".format(dstr),
                 "",
                 annotate_doc(doc, span=dspan)]
        return "\n".join(lines)
    else:
        return "(no commit message; nothing to merge)"