Ejemplo n.º 1
0
def CON_iRSib(parse_dict, constituent, i, constituents):
    # feature
    syntax_tree = constituent.syntax_tree
    connective = constituent.connective
    conn_indices = connective.token_indices
    conn_node = dict_util.get_conn_node(syntax_tree, conn_indices)

    CON_iRSib = dict_util.get_CON_iRSib(syntax_tree, conn_node)

    return Feature("", 1, {1: CON_iRSib})
Ejemplo n.º 2
0
def CON_NT_Position(parse_dict, constituent, i, constituents):
    # load dict
    dict_position = {"right": 1, "left": 2}
    # feature
    syntax_tree = constituent.syntax_tree
    connective = constituent.connective
    conn_indices = connective.token_indices
    conn_node = dict_util.get_conn_node(syntax_tree, conn_indices)

    CON_NT_Position = dict_util.get_CON_NT_Position(conn_node, constituent)

    return get_feature_by_feat(dict_position, CON_NT_Position)
Ejemplo n.º 3
0
def CON_NT_Path(parse_dict, constituent, i, constituents):
    # load dict
    dict_CON_NT_Path = NT_dict().dict_CON_NT_Path
    # feature
    syntax_tree = constituent.syntax_tree
    connective = constituent.connective
    conn_indices = connective.token_indices
    conn_node = dict_util.get_conn_node(syntax_tree, conn_indices)

    CON_NT_Path = dict_util.get_CON_NT_Path(conn_node, constituent)

    return get_feature_by_feat(dict_CON_NT_Path, CON_NT_Path)
Ejemplo n.º 4
0
def NT_parent_linked_ctx(parse_dict, constituent, i, constituents):
    # load dict
    dict_NT_parent_linked_ctx = NT_dict().dict_NT_parent_linked_ctx
    # feature
    NT_parent_linked_ctx = dict_util.get_NT_parent_linked_ctx(constituent)

    return get_feature_by_feat(dict_NT_parent_linked_ctx, NT_parent_linked_ctx)
Ejemplo n.º 5
0
def NT_to_root_path(parse_dict, constituent, i, constituents):
    # load dict
    dict_NT_to_root_path = NT_dict().dict_NT_to_root_path
    # feature
    NT_to_root_path = dict_util.get_NT_to_root_path(constituent)

    return get_feature_by_feat(dict_NT_to_root_path, NT_to_root_path)
Ejemplo n.º 6
0
def prev_curr_some_clause(parse_dict, constituent, i, constituents):
    # feature
    connective = constituent.connective
    DocID = connective.DocID
    sent_index = connective.sent_index

    if (DocID, sent_index) not in dict_clauses:
        clauses_list = dict_util.get_sent_clauses(parse_dict, DocID,
                                                  sent_index)
        dict_clauses[(DocID, sent_index)] = clauses_list
    clauses_list = dict_clauses[(DocID, sent_index)]  #[[1,2],[4,5,6]]
    #为每个constituent ,判断她是否与前面的一个constituent是否处于同一个clause
    prev_curr_some_clause = 0
    if i > 0:
        curr_clause_NO = -1
        for k, item in enumerate(clauses_list):
            if set(constituents[i].indices) <= set(item):
                curr_clause_NO = k
                break
        prev_clause_NO = -1
        for k, item in enumerate(clauses_list):
            if set(constituents[i - 1].indices) <= set(item):
                prev_clause_NO = k
                break

        if curr_clause_NO != -1 and prev_clause_NO != -1 and curr_clause_NO == prev_clause_NO:
            prev_curr_some_clause = 1

    return Feature("", 1, {1: prev_curr_some_clause})
Ejemplo n.º 7
0
def NT_prev_curr_Path(parse_dict, constituent, i, constituents):
    # load dict
    dict_NT_prev_curr_Path = NT_dict().dict_NT_prev_curr_Path
    # feature
    NT_prev_curr_Path = dict_util.get_NT_prev_curr_Path(i, constituents)

    return get_feature_by_feat(dict_NT_prev_curr_Path, NT_prev_curr_Path)
Ejemplo n.º 8
0
def CON_NT_Path_iLsib(parse_dict, constituent, i, constituents):
    # load dict
    dict_CON_NT_Path_iLsib = NT_dict().dict_CON_NT_Path_iLsib
    # feature
    syntax_tree = constituent.syntax_tree
    connective = constituent.connective
    conn_indices = connective.token_indices
    conn_node = dict_util.get_conn_node(syntax_tree, conn_indices)

    CON_NT_Path = dict_util.get_CON_NT_Path(conn_node, constituent)
    CON_iLSib = dict_util.get_CON_iLSib(syntax_tree, conn_node)

    if CON_iLSib > 1:
        CON_NT_Path_iLsib = CON_NT_Path + ":>1"
    else:
        CON_NT_Path_iLsib = CON_NT_Path + ":<=1"

    return get_feature_by_feat(dict_CON_NT_Path_iLsib, CON_NT_Path_iLsib)
Ejemplo n.º 9
0
def all_features(parse_dict, constituent, i, constituents):

    syntax_tree = constituent.syntax_tree
    conn_category = Connectives_dict().conn_category
    connective = constituent.connective
    ''' feat dict '''
    feat_dict_CON_Str = {}
    feat_dict_CON_LStr = {}
    feat_dict_NT_Ctx = {}
    feat_dict_CON_NT_Path = {}
    feat_dict_CON_NT_Path_iLsib = {}
    ''' load dict '''
    dict_CON_Str = NT_dict().dict_CON_Str
    dict_CON_LStr = NT_dict().dict_CON_LStr
    dict_NT_Ctx = NT_dict().dict_NT_Ctx
    dict_CON_NT_Path = NT_dict().dict_CON_NT_Path
    dict_CON_NT_Path_iLsib = NT_dict().dict_CON_NT_Path_iLsib
    ''' feature '''
    conn_indices = connective.token_indices
    DocID = connective.DocID
    sent_index = connective.sent_index

    conn_node = dict_util.get_conn_node(syntax_tree, conn_indices)

    CON_Str = dict_util.get_CON_Str(parse_dict, DocID, sent_index,
                                    conn_indices)
    CON_LStr = CON_Str.lower()
    CON_Cat = conn_category[connective.name]
    CON_iLSib = dict_util.get_CON_iLSib(syntax_tree, conn_node)
    CON_iRSib = dict_util.get_CON_iRSib(syntax_tree, conn_node)
    NT_Ctx = dict_util.get_NT_Ctx(constituent)
    CON_NT_Path = dict_util.get_CON_NT_Path(conn_node, constituent)
    CON_NT_Position = dict_util.get_CON_NT_Position(conn_node, constituent)
    if CON_iLSib > 1:
        CON_NT_Path_iLsib = CON_NT_Path + ":>1"
    else:
        CON_NT_Path_iLsib = CON_NT_Path + ":<=1"

    features = []
    features.append(get_feature(feat_dict_CON_Str, dict_CON_Str, CON_Str))
    features.append(get_feature(feat_dict_CON_LStr, dict_CON_LStr, CON_LStr))
    features.append(get_feature(feat_dict_NT_Ctx, dict_NT_Ctx, NT_Ctx))
    features.append(
        get_feature(feat_dict_CON_NT_Path, dict_CON_NT_Path, CON_NT_Path))
    features.append(
        get_feature(feat_dict_CON_NT_Path_iLsib, dict_CON_NT_Path_iLsib,
                    CON_NT_Path_iLsib))
    # cat
    dict_category = {"subordinator": 1, "coordinator": 2, "adverbial": 3}
    features.append(get_feature({}, dict_category, CON_Cat))
    #number
    features.append(Feature("", 1, {1: CON_iLSib}))
    features.append(Feature("", 1, {1: CON_iRSib}))
    #position
    dict_position = {"right": 1, "left": 2}
    features.append(get_feature({}, dict_position, CON_NT_Position))

    return util.mergeFeatures(features)
Ejemplo n.º 10
0
def CON_Str(parse_dict, constituent, i, constituents):
    # load dict
    dict_CON_Str = NT_dict().dict_CON_Str
    # feature
    connective = constituent.connective
    DocID = connective.DocID
    sent_index = connective.sent_index
    conn_indices = connective.token_indices
    CON_Str = dict_util.get_CON_Str(parse_dict, DocID, sent_index,
                                    conn_indices)

    return get_feature_by_feat(dict_CON_Str, CON_Str)
Ejemplo n.º 11
0
def parent_category(parse_dict, constituent, i, constituents):
    # load dict
    dict_parent_category = NT_dict().dict_parent_category
    # feature
    connective = constituent.connective
    DocID = connective.DocID
    sent_index = connective.sent_index
    conn_indices = connective.token_indices

    parent_category = dict_util.get_parent_category(parse_dict, DocID,
                                                    sent_index, conn_indices)

    return get_feature_by_feat(dict_parent_category, parent_category)
Ejemplo n.º 12
0
def conn_rightSiblingCtx(parse_dict, constituent, i, constituents):
    # load dict
    dict_conn_rightSiblingCtx = NT_dict().dict_conn_rightSiblingCtx
    # feature
    connective = constituent.connective
    DocID = connective.DocID
    sent_index = connective.sent_index
    conn_indices = connective.token_indices

    conn_rightSiblingCtx = dict_util.get_conn_rightSiblingCtx(
        parse_dict, DocID, sent_index, conn_indices)

    return get_feature_by_feat(dict_conn_rightSiblingCtx, conn_rightSiblingCtx)
Ejemplo n.º 13
0
def CParent_to_root_path(parse_dict, constituent, i, constituents):
    # load dict
    dict_CParent_to_root_path = NT_dict().dict_CParent_to_root_path
    # feature
    connective = constituent.connective
    DocID = connective.DocID
    sent_index = connective.sent_index
    conn_indices = connective.token_indices

    CParent_to_root_path = dict_util.get_CParent_to_root_path(
        parse_dict, DocID, sent_index, conn_indices)

    return get_feature_by_feat(dict_CParent_to_root_path, CParent_to_root_path)
Ejemplo n.º 14
0
def NT_conn_level_distance(parse_dict, constituent, i, constituents):
    syntax_tree = constituent.syntax_tree

    nt_node = constituent.node

    connective = constituent.connective
    conn_indices = connective.token_indices
    conn_node = dict_util.get_conn_node(syntax_tree, conn_indices)

    root_node = syntax_tree.tree.get_tree_root()

    nt_level = int(syntax_tree.tree.get_distance(root_node, nt_node))
    conn_level = int(syntax_tree.tree.get_distance(root_node, conn_node))

    return Feature("", 1, {1: conn_level - nt_level})
Ejemplo n.º 15
0
def SS_parallel_not_parallel(inFile, trainOrTest, oa):
 ss_array, ps_array, parse_dict, dictByDocID = splitSSandPS(inFile, trainOrTest, oa);
 SS_conns_parallel_list = [];
 SS_conns_not_parallel_list = [];
 for dictEntry in ss_array:
  conn_indices = dictEntry[4];
  parallel = False;
  if len(conn_indices) > 1:
   for i in range(len(conn_indices)):
    if i + 1 < len(conn_indices) and conn_indices[i+1] - conn_indices[i] > 1:
     parallel = True
  if parallel:
   SS_conns_parallel_list.append(dictEntry);
  else:
   SS_conns_not_parallel_list.append(dictEntry);


 #Convert into connectives
 connectives = [];
 i = 0;
 for dictEntry in SS_conns_not_parallel_list:
  connective = Connective(dictEntry[2], dictEntry[3], dictEntry[4], dictEntry[5]);
  connective.relation_ID = i;
  connective.Arg1_token_indices = dictEntry[8];
  connective.Arg2_token_indices = dictEntry[9];
  i = i + 1;
  connectives.append(connective);

 to_file = '/home/development/code/explicit_args/constituent_feature.txt'
 trSet = [];
 tSet = [];
 #Extract constituents
 count1 = 0;
 count2 = 0;
 count3 = 0;

 totalConst = [];
 for curr_index, connective in enumerate(connectives):
  #totalConst.append(connective);
  sentenceIndexForConstituents = connective.sent_index;
  #arg1TL = SS_conns_not_parallel_list[sentenceIndexForConstituents];
  #print "arg1TL: " + str(arg1TL);
  constituents = _get_constituents(parse_dict, connective)

  constituents = sorted(constituents, key=lambda constituent: constituent.indices[0]) 
  #print "Connective: " + str(connective);
  #print "Constituents: " + str(constituents);
  #for i, constituent in enumerate(constituents):
  # extract features for each constituent
  example_list = [];
  i = 0;
   
  for i, constituent in enumerate(constituents):
   totalConst.append(constituent);
   feature = dict();
   label = "null";
   #print "Constituent Node: " + str(constituent.node);
   #print "Const Connective: " + str(constituent.connective);
   #print "Const Indices: " + str(constituent.indices);
   #print "Constituent Indices: " + " ".join([str(t) for t in constituent.get_indices()]);
   constIndices = set(constituent.get_indices());
   arg1OrigSet = set(connective.Arg1_token_indices);
   arg2OrigSet = set(connective.Arg2_token_indices);  

   if constIndices.issubset(arg1OrigSet):
    label = "arg1";
    count1= count1 + 1;
   elif constIndices.issubset(arg2OrigSet):
    label = "arg2";
    count2 = count2 + 1;
   else:
    label = "null";
    count3 = count3 + 1;
 
   syntax_tree = constituent.syntax_tree
   #conn_category = Connectives_dict().conn_category
   connective = constituent.connective
   conn_indices = connective.token_indices
   DocID = connective.DocID
   sent_index = connective.sent_index
   conn_node = dict_util.get_conn_node(syntax_tree, conn_indices)

   CON_Str = dict_util.get_CON_Str(parse_dict, DocID, sent_index, conn_indices)
   feature['f1'] = CON_Str;
   
   #print "CON_Str: " + str(CON_Str);
   CON_LStr = CON_Str.lower()
   feature['f2'] = CON_LStr;
  
   #print "CON_LStr: " + str(CON_LStr);
   CON_iLSib = dict_util.get_CON_iLSib(syntax_tree,conn_node)
   feature['f3'] = CON_iLSib;
   #print "CON_iLSib: " + str(CON_iLSib);
   CON_iRSib = dict_util.get_CON_iRSib(syntax_tree,conn_node)
   feature['f4'] = CON_iRSib;
   #print "CON_iRSib: " + str(CON_iRSib);
   NT_Ctx = dict_util.get_NT_Ctx(constituent)
   feature['f5'] = NT_Ctx;
   #print "NT_Ctx: " + str(NT_Ctx);
   CON_NT_Path = dict_util.get_CON_NT_Path(conn_node, constituent)
   feature['f6'] = CON_NT_Path;
   #print "CON_NT_Path: " + str(CON_NT_Path);
   CON_NT_Position = dict_util.get_CON_NT_Position(conn_node, constituent)
   feature['f7'] = CON_NT_Position;
   #print "CON_NT_POSITION: " + str(CON_NT_Position);
   if conn_category.has_key(CON_LStr):
    CON_Cat = conn_category[CON_LStr];
   else:
    CON_Cat = "";
   feature['f8'] = CON_Cat;
   
   if CON_iLSib > 1:
    CON_NT_Path_iLsib = CON_NT_Path + ":>1"
   else:
    CON_NT_Path_iLsib = CON_NT_Path + ":<=1"
   feature['f8'] = CON_NT_Path_iLsib;
   '''
   if trainOrTest == 'train': 
    feats = featForConnTr[(DocID, sent_index, tuple(conn_indices))];
   else:
    feats = featForConnTs[(DocID, sent_index, tuple(conn_indices))];
   feature['f9'] = feats['f3'];
   #featForConn[(relation_DocID, parseJSON_sentence_number, connectiveWordIDs)] = features;
   feature['f10']  = dictByDocID[DocID][sent_index][constituent.get_indices()[0]]['pos']
   feature['f11']  = dictByDocID[DocID][sent_index][constituent.get_indices()[-1]]['pos']
   feature['f12']  = 'NA';
   #feature['f13'] = 'NA';
   if i <> 0:
    prevConstituent = constituents[i-1];
    prevDoc = prevConstituent.connective.DocID;
    prevSI = prevConstituent.connective.sent_index;
    feature['f12'] = dictByDocID[prevDoc][prevSI][constituent.get_indices()[-1]]['pos'];

   feature['f13'] = 'NA';
   if i <> len(constituents)-1: 
    nextConstituent = constituents[i+1];
    nextDoc = nextConstituent.connective.DocID;
    nextSI = nextConstituent.connective.sent_index;
    feature['f13'] = dictByDocID[nextDoc][nextSI][constituent.get_indices()[0]]['pos'];
   
   feature['f14'] = 'NA';
   feature['f14'] = dictByDocID[DocID][sent_index][constituent.get_indices()[0]]['word'];
   feature['f15'] = 'NA';
   if i <> 0:
    prevConstituent = constituents[i-1];
    prevDoc = prevConstituent.connective.DocID;
    prevSI = prevConstituent.connective.sent_index;
    feature['f15'] = dictByDocID[prevDoc][prevSI][constituent.get_indices()[-1]]['word'];
  
   feature['16'] = feature['f15'] + "_" + feature['f14'];
   feature['f17'] = 'NA';
   lastCurr = dictByDocID[DocID][sent_index][constituent.get_indices()[-1]]['word'];
   if i <> len(constituents)-1:
    nextConstituent = constituents[i+1];
    nextDoc = nextConstituent.connective.DocID;
    nextSI = nextConstituent.connective.sent_index;
    feature['f17'] = dictByDocID[nextDoc][nextSI][constituent.get_indices()[0]]['word']; 
  
   feature['17'] = lastCurr + "_" + feature['f17']; 
   #print "Feature 17: " + str(feature['f17']);
   '''

   if(trainOrTest == 'train'):
    trSet.append((feature, label));
   else:
    tSet.append((feature, label));

 if(trainOrTest == 'train'):
  return totalConst, trSet, SS_conns_parallel_list, SS_conns_not_parallel_list, parse_dict;
 else: 
  return totalConst, tSet, SS_conns_parallel_list, SS_conns_not_parallel_list, parse_dict;