def get_noun_combinations(map_to_head,term_map):
    to_combine = []
    for head_id, children in map_to_head.iteritems():
        head = term_map[head_id]
        if len(children) == 0 or not (is_noun(head.postag) or head.postag in ['D','@','A','R']) :
            continue

        for child_id in children:
            child = term_map[child_id]
            if is_noun(child.postag) or child.postag in ['D','@','A','R']:
                to_combine.append({child.id, head.id})

    return get_combinations(to_combine)
def get_noun_combinations(map_to_head, term_map):
    to_combine = []
    for head_id, children in map_to_head.iteritems():
        head = term_map[head_id]
        if len(children) == 0 or not (is_noun(head.postag)
                                      or head.postag in ['D', '@', 'A', 'R']):
            continue

        for child_id in children:
            child = term_map[child_id]
            if is_noun(child.postag) or child.postag in ['D', '@', 'A', 'R']:
                to_combine.append({child.id, head.id})

    return get_combinations(to_combine)
def get_entities_from_parse(term_map):
    all_proper = []
    all_entities = []
    all_entities_original_ids = []
    all_proper_original_ids = []
    for k,v in term_map.iteritems():
        if is_noun(v.postag) or v.postag == '@' or v.postag == '#':
            text = []
            split_text = v.text.split()

            ent_ids = []
            for x in range(len(split_text)):
                t = split_text[x]#.strip(string.punctuation)
                #if x == 0 and t in stopwords:
                #    continue
                text.append(t)
                ent_ids.append(v.all_original_ids[x])

            if len(text) > 0 and v.postag != 'O':
                if '^' in v.postag and v.text[0].isupper():
                    all_proper.append(" ".join(text))
                    all_proper_original_ids.append(sorted(v.all_original_ids))

                all_entities.append(" ".join([t.lower() for t in text]))
                all_entities_original_ids.append(sorted(ent_ids))

    return all_entities, all_proper, all_entities_original_ids, all_proper_original_ids
def get_entities_from_parse(term_map):
    all_proper = []
    all_entities = []
    all_entities_original_ids = []
    all_proper_original_ids = []
    for k, v in term_map.iteritems():
        if is_noun(v.postag) or v.postag == '@' or v.postag == '#':
            text = []
            split_text = v.text.split()

            ent_ids = []
            for x in range(len(split_text)):
                t = split_text[x]  #.strip(string.punctuation)
                #if x == 0 and t in stopwords:
                #    continue
                text.append(t)
                ent_ids.append(v.all_original_ids[x])

            if len(text) > 0 and v.postag != 'O':
                if '^' in v.postag and v.text[0].isupper():
                    all_proper.append(" ".join(text))
                    all_proper_original_ids.append(sorted(v.all_original_ids))

                all_entities.append(" ".join([t.lower() for t in text]))
                all_entities_original_ids.append(sorted(ent_ids))

    return all_entities, all_proper, all_entities_original_ids, all_proper_original_ids
def get_people_combinations(map_to_head,term_map):
    to_combine = []
    for head_id, children in map_to_head.iteritems():
        head = term_map[head_id]
        if len(children) == 0 or head.text.lower() not in PEOPLE_TERMS_SET:
            continue

        for child_id in children:
            child = term_map[child_id]
            if is_noun(child.postag) or child.postag == 'A':
                to_combine.append({child.id, head.id})

    return get_combinations(to_combine)