def get_noun_combinations(map_to_head,term_map): to_combine = [] for head_id, children in map_to_head.iteritems(): head = term_map[head_id] if len(children) == 0 or not (is_noun(head.postag) or head.postag in ['D','@','A','R']) : continue for child_id in children: child = term_map[child_id] if is_noun(child.postag) or child.postag in ['D','@','A','R']: to_combine.append({child.id, head.id}) return get_combinations(to_combine)
def get_noun_combinations(map_to_head, term_map): to_combine = [] for head_id, children in map_to_head.iteritems(): head = term_map[head_id] if len(children) == 0 or not (is_noun(head.postag) or head.postag in ['D', '@', 'A', 'R']): continue for child_id in children: child = term_map[child_id] if is_noun(child.postag) or child.postag in ['D', '@', 'A', 'R']: to_combine.append({child.id, head.id}) return get_combinations(to_combine)
def get_entities_from_parse(term_map): all_proper = [] all_entities = [] all_entities_original_ids = [] all_proper_original_ids = [] for k,v in term_map.iteritems(): if is_noun(v.postag) or v.postag == '@' or v.postag == '#': text = [] split_text = v.text.split() ent_ids = [] for x in range(len(split_text)): t = split_text[x]#.strip(string.punctuation) #if x == 0 and t in stopwords: # continue text.append(t) ent_ids.append(v.all_original_ids[x]) if len(text) > 0 and v.postag != 'O': if '^' in v.postag and v.text[0].isupper(): all_proper.append(" ".join(text)) all_proper_original_ids.append(sorted(v.all_original_ids)) all_entities.append(" ".join([t.lower() for t in text])) all_entities_original_ids.append(sorted(ent_ids)) return all_entities, all_proper, all_entities_original_ids, all_proper_original_ids
def get_entities_from_parse(term_map): all_proper = [] all_entities = [] all_entities_original_ids = [] all_proper_original_ids = [] for k, v in term_map.iteritems(): if is_noun(v.postag) or v.postag == '@' or v.postag == '#': text = [] split_text = v.text.split() ent_ids = [] for x in range(len(split_text)): t = split_text[x] #.strip(string.punctuation) #if x == 0 and t in stopwords: # continue text.append(t) ent_ids.append(v.all_original_ids[x]) if len(text) > 0 and v.postag != 'O': if '^' in v.postag and v.text[0].isupper(): all_proper.append(" ".join(text)) all_proper_original_ids.append(sorted(v.all_original_ids)) all_entities.append(" ".join([t.lower() for t in text])) all_entities_original_ids.append(sorted(ent_ids)) return all_entities, all_proper, all_entities_original_ids, all_proper_original_ids
def get_people_combinations(map_to_head,term_map): to_combine = [] for head_id, children in map_to_head.iteritems(): head = term_map[head_id] if len(children) == 0 or head.text.lower() not in PEOPLE_TERMS_SET: continue for child_id in children: child = term_map[child_id] if is_noun(child.postag) or child.postag == 'A': to_combine.append({child.id, head.id}) return get_combinations(to_combine)