def get_inquirer_vec(relation, Arg, parse_dict): inquirer = Non_Explicit_dict().inquirer inquirer_stem = Non_Explicit_dict().inquirer_stem list = [0] * 42 verb_tag= ["MD", "VB", "VBD", "VBG", "VBN", "VBP", "VBZ"] # ["NN/dog" ,"NNS/joks] word_list = _get_Arg_word_pos_list(relation, Arg, parse_dict) for item in word_list: if item == "": continue tag, word = item.split("/")[:2] if tag not in verb_tag: continue word = word.lower() if word in inquirer.keys(): list = _merge(list, inquirer[word]) else: stem = util.stem_string(word) if stem in inquirer.keys(): list = _merge(list, inquirer[stem]) elif stem in inquirer_stem.keys(): list = _merge(list, inquirer_stem[stem]) return list
def is_negate(wordlist): negate = Non_Explicit_dict().negate negate_stem = Non_Explicit_dict().negate_stem for word in wordlist: if word in negate: return True else: stem = util.stem_string(word) if stem in negate or stem in negate_stem: return True return False
def firstlast_first3(relation, parse_dict): # load dict dict_Arg1_first = Non_Explicit_dict().dict_Arg1_first dict_Arg1_last = Non_Explicit_dict().dict_Arg1_last dict_Arg2_first = Non_Explicit_dict().dict_Arg2_first dict_Arg2_last = Non_Explicit_dict().dict_Arg2_last dict_Arg1_first_Arg2_first = Non_Explicit_dict().dict_Arg1_first_Arg2_first dict_Arg1_last_Arg2_last = Non_Explicit_dict().dict_Arg1_last_Arg2_last dict_Arg1_first3 = Non_Explicit_dict().dict_Arg1_first3 dict_Arg2_first3 = Non_Explicit_dict().dict_Arg2_first3 ''' feature ''' Arg1_first, Arg1_last, Arg2_first, Arg2_last,\ Arg1_first_Arg2_first, Arg1_last_Arg2_last,\ Arg1_first3, Arg2_first3 \ = dict_util.get_firstlast_first3(relation, parse_dict) features = [] features.append(get_feature_by_feat(dict_Arg1_first,Arg1_first)) features.append(get_feature_by_feat(dict_Arg1_last,Arg1_last)) features.append(get_feature_by_feat(dict_Arg2_first,Arg2_first)) features.append(get_feature_by_feat(dict_Arg2_last,Arg2_last)) features.append(get_feature_by_feat(dict_Arg1_first_Arg2_first,Arg1_first_Arg2_first)) features.append(get_feature_by_feat(dict_Arg1_last_Arg2_last,Arg1_last_Arg2_last)) features.append(get_feature_by_feat(dict_Arg1_first3,Arg1_first3)) features.append(get_feature_by_feat(dict_Arg2_first3,Arg2_first3)) return util.mergeFeatures(features)
def word2vec_cluster_pair(relation, parse_dict): ''' load dict ''' dict_word2vec_cluster_pairs = Non_Explicit_dict().dict_word2vec_cluster_pairs ''' feature ''' word2vec_cluster_pairs = dict_util.get_word2vec_cluster_pairs(relation, parse_dict) return get_feature_by_feat_list(dict_word2vec_cluster_pairs, word2vec_cluster_pairs)
def verb_pair(relation, parse_dict): # load dict dict_verb_pair = Non_Explicit_dict().dict_verb_pair # feature verb_pair = dict_util.get_verb_pair(relation, parse_dict) return get_feature_by_feat_list(dict_verb_pair, verb_pair)
def arg2_tense(relation, parse_dict): # load dict dict_arg2_tense = Non_Explicit_dict().dict_arg2_tense ''' feature ''' arg2_tense = dict_util.get_arg2_tense(relation, parse_dict) return get_feature_by_feat(dict_arg2_tense, arg2_tense)
def arg2_first3_conn(relation, parse_dict): # load dict dict_arg2_first3_conn = Non_Explicit_dict().dict_arg2_first3_conn ''' feature ''' arg2_first3_conn = dict_util.get_arg2_first3_conn(relation, parse_dict) return get_feature_by_feat(dict_arg2_first3_conn, arg2_first3_conn)
def prev_next_context_conn(relation, parse_dict, implicit_context_dict): # load dict dict_prev_next_context_conn = Non_Explicit_dict().dict_prev_next_context_conn # feature prev_next_context_conn = dict_util.get_prev_next_context_conn(relation, parse_dict, implicit_context_dict) return get_feature_by_feat(dict_prev_next_context_conn, prev_next_context_conn)
def brown_cluster_pair(relation, parse_dict): ''' load dict ''' dict_word_pairs = Non_Explicit_dict().dict_brown_cluster ''' feature ''' brown_cluster_pairs = dict_util.get_brown_cluster_pairs(relation, parse_dict) return get_feature_by_feat_list(dict_word_pairs, brown_cluster_pairs)
def main_verb_pair(relation, parse_dict): # load dict dict_main_verb_pair = Non_Explicit_dict().dict_main_verb_pair #feature main_verb_pair = dict_util.get_main_verb_pair(relation, parse_dict) return get_feature_by_feat(dict_main_verb_pair, main_verb_pair)
def verbs(relation, parse_dict): #load dict dict_verb_classes = Non_Explicit_dict().dict_verb_classes '''feature''' # 1. the number of pairs of verbs in Arg1 and Arg2 from same verb class Arg1_words = dict_util.get_Arg_Words_List(relation, "Arg1", parse_dict) Arg2_words = dict_util.get_Arg_Words_List(relation, "Arg2", parse_dict) count = 0 for w1, w2 in [(w1.lower(), w2.lower()) for w1 in Arg1_words for w2 in Arg2_words]: if w1 in dict_verb_classes and w2 in dict_verb_classes: c1 = dict_verb_classes[w1] c2 = dict_verb_classes[w2] if set(c1.split("#")) & set(c2.split("#")) != set([]): count += 1 feat_1 = Feature("", 1, {1: count}) #2. POS of main verb Arg1_MV_POS = dict_util.get_main_verb_pos(relation, "Arg1", parse_dict) Arg2_MV_POS = dict_util.get_main_verb_pos(relation, "Arg2", parse_dict) MV_POS_feature_list = Arg1_MV_POS + Arg2_MV_POS MV_POS_feature = get_feature_by_list(MV_POS_feature_list) return util.mergeFeatures([feat_1, MV_POS_feature])
def prev_context_conn_sense(relation, parse_dict, implicit_context_dict): # load dict dict_prev_context_conn_sense = Non_Explicit_dict().dict_prev_context_conn_sense # feature prev_context_conn_sense = dict_util.get_prev_context_conn_sense(relation, parse_dict, implicit_context_dict) return get_feature_by_feat(dict_prev_context_conn_sense, prev_context_conn_sense)
def arg_first3_conn_pair(relation, parse_dict): # load dict dict_arg_first3_conn_pair = Non_Explicit_dict().dict_arg_first3_conn_pair ''' feature ''' arg_first3_conn_pair = dict_util.get_arg_first3_conn_pair(relation, parse_dict) return get_feature_by_feat(dict_arg_first3_conn_pair, arg_first3_conn_pair)
def cp_production_rules(relation, parse_dict): ''' load dict ''' dict_cp_production_rules = Non_Explicit_dict().cp_production_rules ''' feature ''' cp_production_rules = dict_util.get_cp_production_rules(relation, parse_dict)#["a|b", "b|e"] return get_feature_by_feat_list(dict_cp_production_rules, cp_production_rules)
def word_pairs(relation, parse_dict): ''' load dict ''' dict_word_pairs = Non_Explicit_dict().dict_word_pairs ''' feature ''' word_pairs = dict_util.get_word_pairs(relation, parse_dict)#["a|b", "b|e"] return get_feature_by_feat_list(dict_word_pairs, word_pairs)
def get_Arg_brown_cluster(relation, Arg, parse_dict): Arg_words = get_Arg_Words_List(relation, Arg, parse_dict) dict_brown_cluster = Non_Explicit_dict().brown_cluster Arg_brown_cluster = [] for word in Arg_words: if word in dict_brown_cluster: Arg_brown_cluster.append(dict_brown_cluster[word]) return Arg_brown_cluster
def get_polarity(word): polarity = Non_Explicit_dict().polarity polarity_stem = Non_Explicit_dict().polarity_stem pol = "" if word in polarity: pol = polarity[word] else: stem = util.stem_string(word) if stem in polarity: pol = polarity[stem] if stem in polarity_stem: pol = polarity_stem[stem] if pol == "": return [] else: return pol.split("|")
def get_inquirer_vec(relation, Arg, parse_dict): inquirer = Non_Explicit_dict().inquirer inquirer_stem = Non_Explicit_dict().inquirer_stem list = [0] * 42 verb_tag = ["MD", "VB", "VBD", "VBG", "VBN", "VBP", "VBZ"] # ["NN/dog" ,"NNS/joks] word_list = _get_Arg_word_pos_list(relation, Arg, parse_dict) for item in word_list: if item == "": continue tag, word = item.split("/")[:2] if tag not in verb_tag: continue word = word.lower() if word in inquirer.keys(): list = _merge(list, inquirer[word]) else: stem = util.stem_string(word) if stem in inquirer.keys(): list = _merge(list, inquirer[stem]) elif stem in inquirer_stem.keys(): list = _merge(list, inquirer_stem[stem]) return list
def get_word_MPQA_polarity(word_pos_tuple): n_stemmed_word_pos_dict = Non_Explicit_dict().n_stemmed_word_pos_dict y_stemmed_word_pos_dict = Non_Explicit_dict().y_stemmed_word_pos_dict word, pos = word_pos_tuple if word_pos_tuple in n_stemmed_word_pos_dict: return n_stemmed_word_pos_dict[word_pos_tuple] elif (word, "anypos") in n_stemmed_word_pos_dict: return n_stemmed_word_pos_dict[(word, "anypos")] # stem word = util.stem_string(word) if (word, pos) in y_stemmed_word_pos_dict: return y_stemmed_word_pos_dict[(word, pos)] elif (word, "anypos") in y_stemmed_word_pos_dict: return y_stemmed_word_pos_dict[(word, "anypos")] # no match return ("NULL", "NULL")
def dependency_rules(relation, parse_dict): ''' load dict ''' dict_dependency_rules = Non_Explicit_dict().dict_dependency_rules ''' feature ''' Arg1_dependency_rules = dict_util.get_Arg_dependency_rules(relation, "Arg1", parse_dict) Arg2_dependency_rules = dict_util.get_Arg_dependency_rules(relation, "Arg2", parse_dict) Arg1_and_Arg2_dependency_rules = list(set(Arg1_dependency_rules) & set(Arg2_dependency_rules)) feat_Arg1 = get_feature_by_feat_list(dict_dependency_rules, Arg1_dependency_rules) feat_Arg2 = get_feature_by_feat_list(dict_dependency_rules, Arg2_dependency_rules) feat_Arg1_and_Arg2 = get_feature_by_feat_list(dict_dependency_rules, Arg1_and_Arg2_dependency_rules) return util.mergeFeatures([feat_Arg1, feat_Arg2, feat_Arg1_and_Arg2])
def get_word2vec_cluster_pairs(relation, parse_dict): Arg1_words = get_Arg_Words_List(relation, "Arg1", parse_dict) Arg2_words = get_Arg_Words_List(relation, "Arg2", parse_dict) dict_word2vec_cluster = Non_Explicit_dict().word2vec_cluster word2vec_cluster_pairs = [] for word1 in Arg1_words: for word2 in Arg2_words: if word1 in dict_word2vec_cluster and word2 in dict_word2vec_cluster: word2vec_cluster_pairs.append("%s|%s" % (dict_word2vec_cluster[word1], dict_word2vec_cluster[word2])) return word2vec_cluster_pairs
def production_rules(relation, parse_dict): '''load dict ''' dict_production_rules = Non_Explicit_dict().dict_production_rules ''' feature ''' Arg1_production_rules = dict_util.get_Arg_production_rules(relation, "Arg1", parse_dict) Arg2_production_rules = dict_util.get_Arg_production_rules(relation, "Arg2", parse_dict) Arg1_and_Arg2_production_rules = list(set(Arg1_production_rules) & set(Arg2_production_rules)) Arg1_production_rules = ["Arg1_%s" % rule for rule in Arg1_production_rules] Arg2_production_rules = ["Arg2_%s" % rule for rule in Arg2_production_rules] Both_production_rules = ["Both_%s" % rule for rule in Arg1_and_Arg2_production_rules] rules = Arg1_production_rules + Arg2_production_rules + Both_production_rules return get_feature_by_feat_list(dict_production_rules, rules)
def arg_brown_cluster(relation, parse_dict): # load dict dict_brown_cluster = Non_Explicit_dict().dict_Arg_brown_cluster ''' feature ''' Arg1_brown_cluster = dict_util.get_Arg_brown_cluster(relation, "Arg1", parse_dict) Arg2_brown_cluster = dict_util.get_Arg_brown_cluster(relation, "Arg2", parse_dict) Both_brown_cluster = list(set(Arg1_brown_cluster) & set(Arg2_brown_cluster)) Arg1_only = list(set(Arg1_brown_cluster) - set(Arg2_brown_cluster)) Arg2_only = list(set(Arg2_brown_cluster) - set(Arg1_brown_cluster)) Arg1_brown_cluster = ["Arg1_%s" % x for x in Arg1_only] Arg2_brown_cluster = ["Arg2_%s" % x for x in Arg2_only] Both_brown_cluster = ["Both_%s" % x for x in Both_brown_cluster] cluster = Arg1_brown_cluster + Arg2_brown_cluster + Both_brown_cluster return get_feature_by_feat_list(dict_brown_cluster, cluster)
def is_negate_MPQA(index, word_list): negate_words = Non_Explicit_dict().negate prev1 = "NULL" prev2 = "NULL" prev3 = "NULL" if index - 1 >= 0: prev1 = word_list[index - 1] if index - 2 >= 0: prev2 = word_list[index - 2] if index - 3 >= 0: prev3 = word_list[index - 3] prev_words = [prev1, prev2, prev3] if set(prev_words) & set(negate_words) != set([]): return True else: return False
def Arg_word2vec(relation, parse_dict): ''' load dict ''' dict_word2vec = Non_Explicit_dict().word2vec_dict ''' feature ''' Arg1_words = dict_util._get_lower_case_lemma_words(relation, "Arg1", parse_dict) Arg2_words = dict_util._get_lower_case_lemma_words(relation, "Arg2", parse_dict) Arg1_words = list(set(Arg1_words)) Arg2_words = list(set(Arg2_words)) Arg1_vec = [0.0] * 300 Arg1_length = 0 for word in Arg1_words: if word in dict_word2vec: vec = dict_word2vec[word] Arg1_vec = util.vec_plus_vec(Arg1_vec, vec) Arg1_length += 1 Arg2_vec = [0.0] * 300 Arg2_length = 0 for word in Arg2_words: if word in dict_word2vec: vec = dict_word2vec[word] Arg2_vec = util.vec_plus_vec(Arg2_vec, vec) Arg2_length += 1 # average if Arg1_length != 0: Arg1_vec = [v / Arg1_length for v in Arg1_vec] if Arg2_length != 0: Arg2_vec = [v / Arg2_length for v in Arg2_vec] feat1 = get_feature_by_list(Arg1_vec) feat2 = get_feature_by_list(Arg2_vec) return util.mergeFeatures([feat1, feat2])