Beispiel #1
0
def CON_iRSib(parse_dict, constituent, i, constituents):
    # feature
    syntax_tree = constituent.syntax_tree
    connective = constituent.connective
    conn_indices = connective.token_indices
    conn_node = dict_util.get_conn_node(syntax_tree, conn_indices)

    CON_iRSib = dict_util.get_CON_iRSib(syntax_tree, conn_node)

    return Feature("", 1, {1: CON_iRSib})
def CON_iRSib(parse_dict, constituent, i, constituents):
    # feature
    syntax_tree = constituent.syntax_tree
    connective = constituent.connective
    conn_indices = connective.token_indices
    conn_node = dict_util.get_conn_node(syntax_tree, conn_indices)

    CON_iRSib = dict_util.get_CON_iRSib(syntax_tree, conn_node)

    return Feature("", 1, {1: CON_iRSib})
Beispiel #3
0
def CON_NT_Path(parse_dict, constituent, i, constituents):
    # load dict
    dict_CON_NT_Path = NT_dict().dict_CON_NT_Path
    # feature
    syntax_tree = constituent.syntax_tree
    connective = constituent.connective
    conn_indices = connective.token_indices
    conn_node = dict_util.get_conn_node(syntax_tree, conn_indices)

    CON_NT_Path = dict_util.get_CON_NT_Path(conn_node, constituent)

    return get_feature_by_feat(dict_CON_NT_Path, CON_NT_Path)
def CON_NT_Position(parse_dict, constituent, i, constituents):
    # load dict
    dict_position = {"right": 1, "left": 2}
    # feature
    syntax_tree = constituent.syntax_tree
    connective = constituent.connective
    conn_indices = connective.token_indices
    conn_node = dict_util.get_conn_node(syntax_tree, conn_indices)

    CON_NT_Position = dict_util.get_CON_NT_Position(conn_node, constituent)

    return get_feature_by_feat(dict_position, CON_NT_Position)
Beispiel #5
0
def CON_NT_Position(parse_dict, constituent, i, constituents):
    # load dict
    dict_position = {"right": 1, "left": 2}
    # feature
    syntax_tree = constituent.syntax_tree
    connective = constituent.connective
    conn_indices = connective.token_indices
    conn_node = dict_util.get_conn_node(syntax_tree, conn_indices)

    CON_NT_Position = dict_util.get_CON_NT_Position(conn_node, constituent)

    return get_feature_by_feat(dict_position, CON_NT_Position)
def CON_NT_Path(parse_dict, constituent, i, constituents):
    # load dict
    dict_CON_NT_Path = NT_dict().dict_CON_NT_Path
    # feature
    syntax_tree = constituent.syntax_tree
    connective = constituent.connective
    conn_indices = connective.token_indices
    conn_node = dict_util.get_conn_node(syntax_tree, conn_indices)

    CON_NT_Path = dict_util.get_CON_NT_Path(conn_node, constituent)

    return get_feature_by_feat(dict_CON_NT_Path, CON_NT_Path)
def NT_parent_linked_ctx(parse_dict, constituent, i, constituents):
    # load dict
    dict_NT_parent_linked_ctx = NT_dict().dict_NT_parent_linked_ctx
    # feature
    NT_parent_linked_ctx = dict_util.get_NT_parent_linked_ctx(constituent)

    return get_feature_by_feat(dict_NT_parent_linked_ctx, NT_parent_linked_ctx)
def NT_to_root_path(parse_dict, constituent, i, constituents):
    # load dict
    dict_NT_to_root_path = NT_dict().dict_NT_to_root_path
    # feature
    NT_to_root_path = dict_util.get_NT_to_root_path(constituent)

    return get_feature_by_feat(dict_NT_to_root_path, NT_to_root_path)
Beispiel #9
0
def prev_curr_some_clause(parse_dict, constituent, i, constituents):
    # feature
    connective = constituent.connective
    DocID = connective.DocID
    sent_index = connective.sent_index

    if (DocID, sent_index) not in dict_clauses:
        clauses_list = dict_util.get_sent_clauses(parse_dict, DocID,
                                                  sent_index)
        dict_clauses[(DocID, sent_index)] = clauses_list
    clauses_list = dict_clauses[(DocID, sent_index)]  #[[1,2],[4,5,6]]
    #为每个constituent ,判断她是否与前面的一个constituent是否处于同一个clause
    prev_curr_some_clause = 0
    if i > 0:
        curr_clause_NO = -1
        for k, item in enumerate(clauses_list):
            if set(constituents[i].indices) <= set(item):
                curr_clause_NO = k
                break
        prev_clause_NO = -1
        for k, item in enumerate(clauses_list):
            if set(constituents[i - 1].indices) <= set(item):
                prev_clause_NO = k
                break

        if curr_clause_NO != -1 and prev_clause_NO != -1 and curr_clause_NO == prev_clause_NO:
            prev_curr_some_clause = 1

    return Feature("", 1, {1: prev_curr_some_clause})
def prev_curr_some_clause(parse_dict, constituent, i, constituents):
    # feature
    connective = constituent.connective
    DocID = connective.DocID
    sent_index = connective.sent_index

    if (DocID, sent_index) not in dict_clauses:
        clauses_list = dict_util.get_sent_clauses(parse_dict, DocID, sent_index)
        dict_clauses[(DocID, sent_index)] = clauses_list
    clauses_list = dict_clauses[(DocID, sent_index)]#[[1,2],[4,5,6]]
    #为每个constituent ,判断她是否与前面的一个constituent是否处于同一个clause
    prev_curr_some_clause = 0
    if i > 0:
        curr_clause_NO = -1
        for k, item in enumerate(clauses_list):
            if set(constituents[i].indices) <= set(item):
                curr_clause_NO = k
                break
        prev_clause_NO = -1
        for k, item in enumerate(clauses_list):
            if set(constituents[i - 1].indices) <= set(item):
                prev_clause_NO = k
                break

        if curr_clause_NO != -1 and prev_clause_NO != -1 and curr_clause_NO == prev_clause_NO:
            prev_curr_some_clause = 1

    return Feature("", 1, {1: prev_curr_some_clause})
def NT_prev_curr_Path(parse_dict, constituent, i, constituents):
    # load dict
    dict_NT_prev_curr_Path = NT_dict().dict_NT_prev_curr_Path
    # feature
    NT_prev_curr_Path = dict_util.get_NT_prev_curr_Path(i, constituents)

    return get_feature_by_feat(dict_NT_prev_curr_Path, NT_prev_curr_Path)
Beispiel #12
0
def NT_parent_linked_ctx(parse_dict, constituent, i, constituents):
    # load dict
    dict_NT_parent_linked_ctx = NT_dict().dict_NT_parent_linked_ctx
    # feature
    NT_parent_linked_ctx = dict_util.get_NT_parent_linked_ctx(constituent)

    return get_feature_by_feat(dict_NT_parent_linked_ctx, NT_parent_linked_ctx)
Beispiel #13
0
def NT_prev_curr_Path(parse_dict, constituent, i, constituents):
    # load dict
    dict_NT_prev_curr_Path = NT_dict().dict_NT_prev_curr_Path
    # feature
    NT_prev_curr_Path = dict_util.get_NT_prev_curr_Path(i, constituents)

    return get_feature_by_feat(dict_NT_prev_curr_Path, NT_prev_curr_Path)
Beispiel #14
0
def NT_to_root_path(parse_dict, constituent, i, constituents):
    # load dict
    dict_NT_to_root_path = NT_dict().dict_NT_to_root_path
    # feature
    NT_to_root_path = dict_util.get_NT_to_root_path(constituent)

    return get_feature_by_feat(dict_NT_to_root_path, NT_to_root_path)
def CON_NT_Path_iLsib(parse_dict, constituent, i, constituents):
    # load dict
    dict_CON_NT_Path_iLsib = NT_dict().dict_CON_NT_Path_iLsib
    # feature
    syntax_tree = constituent.syntax_tree
    connective = constituent.connective
    conn_indices = connective.token_indices
    conn_node = dict_util.get_conn_node(syntax_tree, conn_indices)

    CON_NT_Path = dict_util.get_CON_NT_Path(conn_node, constituent)
    CON_iLSib = dict_util.get_CON_iLSib(syntax_tree, conn_node)

    if CON_iLSib > 1:
        CON_NT_Path_iLsib = CON_NT_Path + ":>1"
    else:
        CON_NT_Path_iLsib = CON_NT_Path + ":<=1"

    return get_feature_by_feat(dict_CON_NT_Path_iLsib, CON_NT_Path_iLsib)
Beispiel #16
0
def CON_NT_Path_iLsib(parse_dict, constituent, i, constituents):
    # load dict
    dict_CON_NT_Path_iLsib = NT_dict().dict_CON_NT_Path_iLsib
    # feature
    syntax_tree = constituent.syntax_tree
    connective = constituent.connective
    conn_indices = connective.token_indices
    conn_node = dict_util.get_conn_node(syntax_tree, conn_indices)

    CON_NT_Path = dict_util.get_CON_NT_Path(conn_node, constituent)
    CON_iLSib = dict_util.get_CON_iLSib(syntax_tree, conn_node)

    if CON_iLSib > 1:
        CON_NT_Path_iLsib = CON_NT_Path + ":>1"
    else:
        CON_NT_Path_iLsib = CON_NT_Path + ":<=1"

    return get_feature_by_feat(dict_CON_NT_Path_iLsib, CON_NT_Path_iLsib)
Beispiel #17
0
def all_features(parse_dict, constituent, i, constituents):

    syntax_tree = constituent.syntax_tree
    conn_category = Connectives_dict().conn_category
    connective = constituent.connective
    ''' feat dict '''
    feat_dict_CON_Str = {}
    feat_dict_CON_LStr = {}
    feat_dict_NT_Ctx = {}
    feat_dict_CON_NT_Path = {}
    feat_dict_CON_NT_Path_iLsib = {}
    ''' load dict '''
    dict_CON_Str = NT_dict().dict_CON_Str
    dict_CON_LStr = NT_dict().dict_CON_LStr
    dict_NT_Ctx = NT_dict().dict_NT_Ctx
    dict_CON_NT_Path = NT_dict().dict_CON_NT_Path
    dict_CON_NT_Path_iLsib = NT_dict().dict_CON_NT_Path_iLsib
    ''' feature '''
    conn_indices = connective.token_indices
    DocID = connective.DocID
    sent_index = connective.sent_index

    conn_node = dict_util.get_conn_node(syntax_tree, conn_indices)

    CON_Str = dict_util.get_CON_Str(parse_dict, DocID, sent_index,
                                    conn_indices)
    CON_LStr = CON_Str.lower()
    CON_Cat = conn_category[connective.name]
    CON_iLSib = dict_util.get_CON_iLSib(syntax_tree, conn_node)
    CON_iRSib = dict_util.get_CON_iRSib(syntax_tree, conn_node)
    NT_Ctx = dict_util.get_NT_Ctx(constituent)
    CON_NT_Path = dict_util.get_CON_NT_Path(conn_node, constituent)
    CON_NT_Position = dict_util.get_CON_NT_Position(conn_node, constituent)
    if CON_iLSib > 1:
        CON_NT_Path_iLsib = CON_NT_Path + ":>1"
    else:
        CON_NT_Path_iLsib = CON_NT_Path + ":<=1"

    features = []
    features.append(get_feature(feat_dict_CON_Str, dict_CON_Str, CON_Str))
    features.append(get_feature(feat_dict_CON_LStr, dict_CON_LStr, CON_LStr))
    features.append(get_feature(feat_dict_NT_Ctx, dict_NT_Ctx, NT_Ctx))
    features.append(
        get_feature(feat_dict_CON_NT_Path, dict_CON_NT_Path, CON_NT_Path))
    features.append(
        get_feature(feat_dict_CON_NT_Path_iLsib, dict_CON_NT_Path_iLsib,
                    CON_NT_Path_iLsib))
    # cat
    dict_category = {"subordinator": 1, "coordinator": 2, "adverbial": 3}
    features.append(get_feature({}, dict_category, CON_Cat))
    #number
    features.append(Feature("", 1, {1: CON_iLSib}))
    features.append(Feature("", 1, {1: CON_iRSib}))
    #position
    dict_position = {"right": 1, "left": 2}
    features.append(get_feature({}, dict_position, CON_NT_Position))

    return util.mergeFeatures(features)
def CON_Str(parse_dict, constituent, i, constituents):
    # load dict
    dict_CON_Str = NT_dict().dict_CON_Str
    # feature
    connective = constituent.connective
    DocID = connective.DocID
    sent_index = connective.sent_index
    conn_indices = connective.token_indices
    CON_Str = dict_util.get_CON_Str(parse_dict, DocID, sent_index, conn_indices)

    return get_feature_by_feat(dict_CON_Str, CON_Str)
def right_sibling_category(parse_dict, constituent, i, constituents):
    # load dict
    dict_right_sibling_category = NT_dict().dict_right_sibling_category
    # feature
    connective = constituent.connective
    DocID = connective.DocID
    sent_index = connective.sent_index
    conn_indices = connective.token_indices

    right_sibling_category = dict_util.get_right_sibling_category(parse_dict, DocID, sent_index, conn_indices)

    return get_feature_by_feat(dict_right_sibling_category, right_sibling_category)
def CParent_to_root_path_node_names(parse_dict, constituent, i, constituents):
    # load dict
    dict_CParent_to_root_path_node_names = NT_dict().dict_CParent_to_root_path_node_names
    # feature
    connective = constituent.connective
    DocID = connective.DocID
    sent_index = connective.sent_index
    conn_indices = connective.token_indices

    CParent_to_root_path_node_names = dict_util.get_CParent_to_root_path_node_names(parse_dict, DocID, sent_index, conn_indices)

    return get_feature_by_feat_list(dict_CParent_to_root_path_node_names, CParent_to_root_path_node_names)
def conn_rightSiblingCtx(parse_dict, constituent, i, constituents):
    # load dict
    dict_conn_rightSiblingCtx = NT_dict().dict_conn_rightSiblingCtx
    # feature
    connective = constituent.connective
    DocID = connective.DocID
    sent_index = connective.sent_index
    conn_indices = connective.token_indices

    conn_rightSiblingCtx = dict_util.get_conn_rightSiblingCtx(parse_dict, DocID, sent_index, conn_indices)

    return get_feature_by_feat(dict_conn_rightSiblingCtx, conn_rightSiblingCtx)
Beispiel #22
0
def CON_Str(parse_dict, constituent, i, constituents):
    # load dict
    dict_CON_Str = NT_dict().dict_CON_Str
    # feature
    connective = constituent.connective
    DocID = connective.DocID
    sent_index = connective.sent_index
    conn_indices = connective.token_indices
    CON_Str = dict_util.get_CON_Str(parse_dict, DocID, sent_index,
                                    conn_indices)

    return get_feature_by_feat(dict_CON_Str, CON_Str)
Beispiel #23
0
def conn_rightSiblingCtx(parse_dict, constituent, i, constituents):
    # load dict
    dict_conn_rightSiblingCtx = NT_dict().dict_conn_rightSiblingCtx
    # feature
    connective = constituent.connective
    DocID = connective.DocID
    sent_index = connective.sent_index
    conn_indices = connective.token_indices

    conn_rightSiblingCtx = dict_util.get_conn_rightSiblingCtx(
        parse_dict, DocID, sent_index, conn_indices)

    return get_feature_by_feat(dict_conn_rightSiblingCtx, conn_rightSiblingCtx)
Beispiel #24
0
def parent_category(parse_dict, constituent, i, constituents):
    # load dict
    dict_parent_category = NT_dict().dict_parent_category
    # feature
    connective = constituent.connective
    DocID = connective.DocID
    sent_index = connective.sent_index
    conn_indices = connective.token_indices

    parent_category = dict_util.get_parent_category(parse_dict, DocID,
                                                    sent_index, conn_indices)

    return get_feature_by_feat(dict_parent_category, parent_category)
Beispiel #25
0
def CParent_to_root_path(parse_dict, constituent, i, constituents):
    # load dict
    dict_CParent_to_root_path = NT_dict().dict_CParent_to_root_path
    # feature
    connective = constituent.connective
    DocID = connective.DocID
    sent_index = connective.sent_index
    conn_indices = connective.token_indices

    CParent_to_root_path = dict_util.get_CParent_to_root_path(
        parse_dict, DocID, sent_index, conn_indices)

    return get_feature_by_feat(dict_CParent_to_root_path, CParent_to_root_path)
def NT_conn_level_distance(parse_dict, constituent, i, constituents):
    syntax_tree = constituent.syntax_tree

    nt_node = constituent.node

    connective = constituent.connective
    conn_indices = connective.token_indices
    conn_node = dict_util.get_conn_node(syntax_tree, conn_indices)

    root_node = syntax_tree.tree.get_tree_root()

    nt_level = int(syntax_tree.tree.get_distance(root_node, nt_node))
    conn_level = int(syntax_tree.tree.get_distance(root_node, conn_node))

    return Feature("", 1, {1:  conn_level - nt_level})
Beispiel #27
0
def NT_conn_level_distance(parse_dict, constituent, i, constituents):
    syntax_tree = constituent.syntax_tree

    nt_node = constituent.node

    connective = constituent.connective
    conn_indices = connective.token_indices
    conn_node = dict_util.get_conn_node(syntax_tree, conn_indices)

    root_node = syntax_tree.tree.get_tree_root()

    nt_level = int(syntax_tree.tree.get_distance(root_node, nt_node))
    conn_level = int(syntax_tree.tree.get_distance(root_node, conn_node))

    return Feature("", 1, {1: conn_level - nt_level})
def all_features(parse_dict, constituent, i, constituents):

    syntax_tree = constituent.syntax_tree
    conn_category = Connectives_dict().conn_category
    connective = constituent.connective

    ''' feat dict '''
    feat_dict_CON_Str = {}
    feat_dict_CON_LStr = {}
    feat_dict_NT_Ctx = {}
    feat_dict_CON_NT_Path = {}
    feat_dict_CON_NT_Path_iLsib = {}



    ''' load dict '''
    dict_CON_Str = NT_dict().dict_CON_Str
    dict_CON_LStr = NT_dict().dict_CON_LStr
    dict_NT_Ctx = NT_dict().dict_NT_Ctx
    dict_CON_NT_Path = NT_dict().dict_CON_NT_Path
    dict_CON_NT_Path_iLsib = NT_dict().dict_CON_NT_Path_iLsib



    ''' feature '''
    conn_indices = connective.token_indices
    DocID = connective.DocID
    sent_index = connective.sent_index

    conn_node = dict_util.get_conn_node(syntax_tree, conn_indices)

    CON_Str = dict_util.get_CON_Str(parse_dict, DocID, sent_index, conn_indices)
    CON_LStr = CON_Str.lower()
    CON_Cat = conn_category[connective.name]
    CON_iLSib = dict_util.get_CON_iLSib(syntax_tree,conn_node)
    CON_iRSib = dict_util.get_CON_iRSib(syntax_tree,conn_node)
    NT_Ctx = dict_util.get_NT_Ctx(constituent)
    CON_NT_Path = dict_util.get_CON_NT_Path(conn_node, constituent)
    CON_NT_Position = dict_util.get_CON_NT_Position(conn_node, constituent)
    if CON_iLSib > 1:
        CON_NT_Path_iLsib = CON_NT_Path + ":>1"
    else:
        CON_NT_Path_iLsib = CON_NT_Path + ":<=1"



    features = []
    features.append(get_feature(feat_dict_CON_Str, dict_CON_Str , CON_Str))
    features.append(get_feature(feat_dict_CON_LStr, dict_CON_LStr, CON_LStr))
    features.append(get_feature(feat_dict_NT_Ctx, dict_NT_Ctx, NT_Ctx))
    features.append(get_feature(feat_dict_CON_NT_Path, dict_CON_NT_Path, CON_NT_Path))
    features.append(get_feature(feat_dict_CON_NT_Path_iLsib, dict_CON_NT_Path_iLsib, CON_NT_Path_iLsib))
    # cat
    dict_category = {"subordinator": 1, "coordinator": 2, "adverbial": 3 }
    features.append(get_feature({}, dict_category , CON_Cat))
    #number
    features.append(Feature("", 1, {1: CON_iLSib}))
    features.append(Feature("", 1, {1: CON_iRSib}))
    #position
    dict_position = {"right": 1, "left": 2}
    features.append(get_feature({}, dict_position , CON_NT_Position))


    return util.mergeFeatures(features)