コード例 #1
0
ファイル: data.py プロジェクト: Qing1201/TCMSA
        def post_order(s):
            nonlocal index
            nonlocal leaf_cnt
            label, phrase = s[1:-1].split(None, 1)
            leafs = sexpr.sexpr_tokenize(phrase)

            if len(leafs) == 2:
                lstr, rstr = leafs
                lrst = post_order(lstr)
                rrst = post_order(rstr)
                prts[lrst].append(index)
                prts[rrst].append(index)
            else:
                leaf_cnt += 1

            labels.append(label)
            prts.append([])
            # childs.append([lrst, rrst] if len(leafs)==2 else [])
            cur = index
            childs_list = [lrst, rrst] if len(leafs) == 2 else []
            childs_list.append(cur)  # self-loop
            childs.append(childs_list)
            # print(index)
            index += 1

            return cur
コード例 #2
0
 def parse_subtree(self, s):
     try:
         root, children = s[1:-1].lstrip().split(' ', 1)
     except ValueError:
         root = ''
         children = s[1:-1].lstrip()
     return (root.strip(punctuation + ' '),
             sexpr_tokenize(children.strip()))
コード例 #3
0
def parse(root_sexpr):
    label, sub_sexpr = root_sexpr[1:-1].split(None, 1)
    tokens = []
    stack = Stack()
    for sub_sexpr in reversed(sexpr.sexpr_tokenize(sub_sexpr)):
        stack.push(sub_sexpr)
    while not stack.empty:
        _, next_sexpr = stack.pop()[1:-1].split(None, 1)
        # Leaf: if the length of the next is 1 and the string isn't in brackets
        next_sexprs = sexpr.sexpr_tokenize(next_sexpr)
        if len(next_sexprs) == 1 and ('(' not in next_sexprs[0]
                                     and ')' not in next_sexprs):
            tokens.append(next_sexprs[0])
        # Otherwise, add them to the stack in reverse order
        else:
            for sub_sexpr in reversed(next_sexprs):
                stack.push(sub_sexpr)
    return label, ' '.join(tokens)
コード例 #4
0
ファイル: data.py プロジェクト: Qing1201/TCMSA
        def post_order(s):
            label, phrase = s[1:-1].split(None, 1)
            leafs = sexpr.sexpr_tokenize(phrase)

            if len(leafs) == 2:
                lstr, rstr = leafs
                post_order(lstr)
                post_order(rstr)
            else:
                words.append(leafs[0])
                labels.append(label)

            return label
コード例 #5
0
ファイル: tree_batch.py プロジェクト: tungk/cstlstm
def tokenize(x):
    """Tokenizes S-expression dependency parse trees that come with NLI data.

    This one has been tested here:
    https://github.com/timniven/hsnli/blob/master/hsnli/tests/tree_sexpr_tests.py

    Args:
      x: String, the tree (or subtree) S-expression.

    Returns:
      String, List(String), Boolean: tag, [S-expression for the node], is_leaf
        flag indicating whether this node is a leaf.
    """
    remove_outer_brackets = x[1:-1]
    if '(' not in remove_outer_brackets:  # means it's a leaf
        split = remove_outer_brackets.split(' ')
        tag, data = split[0], [split[1]]
    else:
        sexpr_tokenized = sexpr.sexpr_tokenize(remove_outer_brackets)
        tag = sexpr_tokenized[0]
        del sexpr_tokenized[0]
        data = sexpr_tokenized
    is_leaf = len(data) == 1 and not (data[0][0] == '(' and data[0][-1] == ')')
    return tag, data, is_leaf
コード例 #6
0
ファイル: sentiment.py プロジェクト: zhaogang92/fold
def tokenize(s):
    # sexpr_tokenize can't parse 'foo bar', only '(foo) (bar)', so we
    # use split to handle the case of a leaf (e.g. 'label word').
    label, phrase = s[1:-1].split(None, 1)
    return label, sexpr.sexpr_tokenize(phrase)
コード例 #7
0
def tokenize(s):
    labelAndDepth, phrase = s[1:-1].split(None, 1)
    label, outerContext, ent1Posit, ent2Posit = labelAndDepth.split("/")
    # classification
    return label, (sexpr.sexpr_tokenize(phrase), outerContext, ent1Posit,
                   ent2Posit)
コード例 #8
0
 def tokenize(s):
     label, phrase = s[1:-1].split(None, 1)
     return label, sexpr.sexpr_tokenize(phrase)
コード例 #9
0
ファイル: sentiment.py プロジェクト: wangbosdqd/fold
def tokenize(s):
  # sexpr_tokenize can't parse 'foo bar', only '(foo) (bar)', so we
  # use split to handle the case of a leaf (e.g. 'label word').
  label, phrase = s[1:-1].split(None, 1)
  return label, sexpr.sexpr_tokenize(phrase)
コード例 #10
0
ファイル: tree_lstm.py プロジェクト: gottalottarock/hlstm
 def tokenize(self, s):
     if not s[1:-1].strip():
         return ['']
     return sexpr_tokenize(s[1:-1].strip())