コード例 #1
0
ファイル: clean_emoticons.py プロジェクト: kowey/educe
def turns_with_final_emoticons(doc, tags):
    """
    Return a tuple of lists.

    Both lists contain the turns in a document that end with the
    pattern EDU emoticon-only-EDU.

    The first (main) list contains those that are not pointed to by any
    relations or schema. The second (warnings only) list contains those
    that have relations or schema pointing to them.

    The reason we distinguish between the two lists is that we don't
    want to touch those in the latter (out of conservatism, the idea
    of removing these from their relations, CDUs seems scary), but we
    want to know about them.
    """
    egraph = EnclosureGraph(doc, tags)
    affected_free_turns = []
    affected_linked_turns = []

    for turn in sorted_turns(doc):
        edus = sorted_first_widest(egraph.inside(turn))

        last_edu = edus[-1]
        if len(edus) > 1 and is_just_emoticon(egraph.inside(last_edu)):
            if has_links(doc, last_edu):
                affected_linked_turns.append(turn)
            else:
                affected_free_turns.append(turn)

    return affected_free_turns, affected_linked_turns
コード例 #2
0
def turns_with_final_emoticons(doc, tags):
    """
    Return a tuple of lists.

    Both lists contain the turns in a document that end with the
    pattern EDU emoticon-only-EDU.

    The first (main) list contains those that are not pointed to by any
    relations or schema. The second (warnings only) list contains those
    that have relations or schema pointing to them.

    The reason we distinguish between the two lists is that we don't
    want to touch those in the latter (out of conservatism, the idea
    of removing these from their relations, CDUs seems scary), but we
    want to know about them.
    """
    egraph = EnclosureGraph(doc, tags)
    affected_free_turns = []
    affected_linked_turns = []

    for turn in sorted_turns(doc):
        edus = sorted_first_widest(egraph.inside(turn))

        last_edu = edus[-1]
        if len(edus) > 1 and is_just_emoticon(egraph.inside(last_edu)):
            if has_links(doc, last_edu):
                affected_linked_turns.append(turn)
            else:
                affected_free_turns.append(turn)

    return affected_free_turns, affected_linked_turns
コード例 #3
0
ファイル: clean_emoticons.py プロジェクト: kowey/educe
def merge_final_emoticons(tcache, turn_spans, doc, tags):
    """
    Given a timestamp cache and some text spans identifying
    turns with final emoticons in them, and a document:

    1. find the specified turns in the document
    2. absorb their emoticon EDUs into the one before it

    This modifies the document and does not return
    anything
    """
    egraph = EnclosureGraph(doc, tags)
    for turn in sorted_turns(doc):
        if turn.text_span() not in turn_spans:
            continue
        edus = sorted_first_widest(egraph.inside(turn))
        assert len(edus) > 1

        stamp = tcache.get(educe.stac.turn_id(turn))
        last_edu = edus[-1]
        penult_edu = edus[-2]
        absorb_emoticon(doc, stamp, penult_edu, last_edu)
        doc.units.remove(last_edu)
コード例 #4
0
def merge_final_emoticons(tcache, turn_spans, doc, tags):
    """
    Given a timestamp cache and some text spans identifying
    turns with final emoticons in them, and a document:

    1. find the specified turns in the document
    2. absorb their emoticon EDUs into the one before it

    This modifies the document and does not return
    anything
    """
    egraph = EnclosureGraph(doc, tags)
    for turn in sorted_turns(doc):
        if turn.text_span() not in turn_spans:
            continue
        edus = sorted_first_widest(egraph.inside(turn))
        assert len(edus) > 1

        stamp = tcache.get(educe.stac.turn_id(turn))
        last_edu = edus[-1]
        penult_edu = edus[-2]
        absorb_emoticon(doc, stamp, penult_edu, last_edu)
        doc.units.remove(last_edu)