コード例 #1
0
ファイル: smart_select.py プロジェクト: hellp/snaked
def block_smart_extend(has_selection, start, end):
    end = end.copy()
    if not end.is_end():
        end.backward_lines(1)

    start_ws = len(get_whitespace(start))
    prev_empty = start.is_start() or line_is_empty(prev_line(start))
    prev_ws = len(get_whitespace(prev_line(start)))

    end_ws = len(get_whitespace(end))
    next_empty = end.is_end() or line_is_empty(next_line(end))
    next_ws = len(get_whitespace(next_line(end)))

    newstart, newend = start.copy(), end

    if not has_selection and start.get_line() == end.get_line() and \
            ( next_empty or next_ws < end_ws ) and (prev_empty or prev_ws < start_ws):
        pass
    elif not prev_empty and not next_empty and prev_ws == start_ws == next_ws == end_ws:
        newstart = extend_without_gap(start, start_ws, -1)
        newend = extend_without_gap(end, end_ws, 1)
    elif prev_empty and not next_empty and next_ws == end_ws:
        newend = extend_without_gap(end, end_ws, 1)
    elif not prev_empty and next_empty and prev_ws == start_ws:
        newstart = extend_without_gap(start, start_ws, -1)
    elif not next_empty and next_ws > start_ws:
        newend = extend_block_without_gap(end, start_ws, 1)
    elif ( not next_empty and next_ws == start_ws ) or ( not prev_empty and prev_ws >= start_ws ):
        if not prev_empty:
            newstart = extend_without_gap(start, start_ws, -1)
        if not next_empty:
            newend = extend_without_gap(end, start_ws, 1)
    elif next_empty and prev_empty:
        newstart = extend_with_gap(start, start_ws, -1)
        newend = extend_with_gap(end, start_ws, 1)
    elif next_empty and not prev_empty and prev_ws < start_ws:
        newend = extend_with_gap(end, start_ws, 1)

    if has_selection and start.equal(newstart) and end.equal(newend):
        if not prev_empty:
            newstart.backward_lines(1)
        else:
            ne = get_next_not_empty_line(start, -1)
            if ne:
                newstart = ne

        if not next_empty and len(line_text(next_line(end)).strip()) < 5:
            newend.forward_lines(1)

    newend.forward_lines(1)
    return newstart, newend
コード例 #2
0
ファイル: score_factuality.py プロジェクト: cltl/clin26-eval
def read_tokens_conll(cols, f, path=''):
    tokens = []
    for _ in cols: tokens.append(set())
    tokens = tuple(tokens)
    sent = 1
    line = next_line(f)
    while line:
        if line == '\n':
            sent += 1
            line = next_line(f)
            continue
        fields = line.strip().split('\t')
        token = int(fields[0])
        for i, col in enumerate(cols):
            label = re.sub('-\w+$', '', fields[col])
            if label != '_':
                tokens[i].add((sent, token, label))
        line = next_line(f)
    return tokens
コード例 #3
0
def read_tokens_conll(cols, f, path=''):
    tokens = []
    for _ in cols:
        tokens.append(set())
    tokens = tuple(tokens)
    sent = 1
    line = next_line(f)
    while line:
        if line == '\n':
            sent += 1
            line = next_line(f)
            continue
        fields = line.strip().split('\t')
        token = int(fields[0])
        for i, col in enumerate(cols):
            label = re.sub('-\w+$', '', fields[col])
            if label != '_':
                tokens[i].add((sent, token, label))
        line = next_line(f)
    return tokens
コード例 #4
0
ファイル: score_factuality.py プロジェクト: cltl/clin26-eval
def read_event_spans_conll(f, path=''):
    spans = set()
    sent = 1
    line = next_line(f)
    id2tokens = defaultdict(list)
    while line:
        if line == '\n':
            sent += 1
        else:
            fields = line.strip().split('\t')
            if fields[2] != '_':
                type_, label, event_id = fields[2].split('-')
                assert type_ in ['I', 'B']
                assert label == 'E'
                assert (type_ == 'B' and len(id2tokens[event_id]) == 0) or \
                        (type_ == 'I' and len(id2tokens[event_id]) > 0), \
                        "Format error in file %s, sentence %d, token %s: %s" \
                        %(path, sent, fields[0], fields[2])
                id2tokens[event_id].append(int(fields[0])) 
        line = next_line(f)
    for _, tokens in id2tokens.iteritems():
        spans.add((sent, tuple(tokens)))
    return spans
コード例 #5
0
def read_event_spans_conll(f, path=''):
    spans = set()
    sent = 1
    line = next_line(f)
    id2tokens = defaultdict(list)
    while line:
        if line == '\n':
            sent += 1
        else:
            fields = line.strip().split('\t')
            if fields[2] != '_':
                type_, label, event_id = fields[2].split('-')
                assert type_ in ['I', 'B']
                assert label == 'E'
                assert (type_ == 'B' and len(id2tokens[event_id]) == 0) or \
                        (type_ == 'I' and len(id2tokens[event_id]) > 0), \
                        "Format error in file %s, sentence %d, token %s: %s" \
                        %(path, sent, fields[0], fields[2])
                id2tokens[event_id].append(int(fields[0]))
        line = next_line(f)
    for _, tokens in id2tokens.iteritems():
        spans.add((sent, tuple(tokens)))
    return spans
コード例 #6
0
ファイル: score_factuality.py プロジェクト: cltl/clin26-eval
def read_generic_spans_conll(col, f, path):
    spans = set()
    id2tokens = defaultdict(list)
    sent = 1
    line = next_line(f)
    while line:
        if line == '\n':
            sent += 1
        else:
            fields = line.strip().split('\t')
            token = int(fields[0])
            label = fields[col]
            if label != '_':
                type_, label, id_ = label.split('-')
                assert type_ in ['I', 'B']
                assert (type_ == 'B' and len(id2tokens[(label, id_)]) == 0) or \
                        (type_ == 'I' and len(id2tokens[(label, id_)]) > 0), \
                        "Format error in file %s, sentence %d, token %s: %s" \
                        %(path, sent, fields[0], fields[col])
                id2tokens[(label, id_)].append(token)
        line = next_line(f)
    for (label, _), tokens in id2tokens.iteritems():
        spans.add((sent, tuple(tokens), label))
    return spans
コード例 #7
0
def read_generic_spans_conll(col, f, path):
    spans = set()
    id2tokens = defaultdict(list)
    sent = 1
    line = next_line(f)
    while line:
        if line == '\n':
            sent += 1
        else:
            fields = line.strip().split('\t')
            token = int(fields[0])
            label = fields[col]
            if label != '_':
                type_, label, id_ = label.split('-')
                assert type_ in ['I', 'B']
                assert (type_ == 'B' and len(id2tokens[(label, id_)]) == 0) or \
                        (type_ == 'I' and len(id2tokens[(label, id_)]) > 0), \
                        "Format error in file %s, sentence %d, token %s: %s" \
                        %(path, sent, fields[0], fields[col])
                id2tokens[(label, id_)].append(token)
        line = next_line(f)
    for (label, _), tokens in id2tokens.iteritems():
        spans.add((sent, tuple(tokens), label))
    return spans
コード例 #8
0
def read_spans_conll(f, path=''):
    spans = set()
    sent = 1
    line = next_line(f)
    while line:
        if line == '\n':
            sent += 1
            line = next_line(f)
            continue
        fields = line.strip().split('\t')
        token = int(fields[0])
        url = fields[3]
        if url != '_':
            start = end = token
            while True:
                fields = line.strip().split('\t')
                if (not line) or line == '\n' or fields[3] != url:
                    break
                end = int(fields[0])
                line = next_line(f)
            spans.add((sent, start, end, url))
        else:
            line = next_line(f)
    return spans