Esempio n. 1
0
def buildTrie():
    trie = Trie()
    for professor in Professor.get_all_professors():
        for str in formatSearchContent(professor.name):
            trie.add(str, professor.get_id())
        for str in formatSearchContent(professor.title):
            trie.add(str, professor.get_id())
        if professor.special_title:
            for str in formatSearchContent(professor.special_title):
                trie.add(str, professor.get_id())
        if professor.introduction:
            for str in formatSearchContent(professor.introduction):
                trie.add(str, professor.get_id())
        for research_area in professor.research_areas:
            for str in formatSearchContent(research_area):
                trie.add(str, professor.get_id())
        for research_interest in professor.research_interests:
            for str in formatSearchContent(research_interest):
                trie.add(str, professor.get_id())
        for research_group in professor.research_groups:
            for str in formatSearchContent(research_group):
                trie.add(str, professor.get_id())
        if professor.office:
            for str in formatSearchContent(professor.office):
                trie.add(str, professor.get_id())
        if professor.phone:
            for str in formatSearchContent(professor.phone):
                trie.add(str, professor.get_id())
        if professor.email:
            for str in formatSearchContent("".join(re.findall(r'(.+)@', professor.email))):
                trie.add(str, professor.get_id())
    return trie
Esempio n. 2
0
def search_professors(query_words):
    query_words = formatSearchContent(query_words)
    pid = []
    relevence = {}
    trie = PROFESSOR_TRIE
    for word in query_words:
        pids_of_word = set()
        for string in trie.searchSubstring(word):
            pids_of_string = trie.get(string)
            for prof_id in pids_of_string:
                if not(prof_id in relevence):
                    relevence[prof_id] = 0.0
                relevence[prof_id] = relevence[prof_id] + float(len(word))/len(string) * OCCURRENCE_DICT[prof_id][string]
            pids_of_word = pids_of_word.union(pids_of_string)
        pid.append(pids_of_word)
    ret = [Professor.get_professor(prof_id) for prof_id in reduce(lambda x, y: x.intersection(y), pid)]
    return sorted(ret, key=lambda x: -relevence[x.key.id()])
Esempio n. 3
0
def buildOccurrenceDict():
    ret = dict()
    for professor in Professor.get_all_professors():
        prof_dict = dict()
        for str in formatSearchContent(professor.name):
            if str in prof_dict:
                prof_dict[str] = prof_dict[str] + 1
            else:
                prof_dict[str] = 0
        for str in formatSearchContent(professor.title):
            if str in prof_dict:
                prof_dict[str] = prof_dict[str] + 1
            else:
                prof_dict[str] = 0
        if professor.special_title:
            for str in formatSearchContent(professor.special_title):
                if str in prof_dict:
                    prof_dict[str] = prof_dict[str] + 1
                else:
                    prof_dict[str] = 0
        if professor.introduction:
            for str in formatSearchContent(professor.introduction):
                if str in prof_dict:
                    prof_dict[str] = prof_dict[str] + 1
                else:
                    prof_dict[str] = 0
        for research_area in professor.research_areas:
            for str in formatSearchContent(research_area):
                if str in prof_dict:
                    prof_dict[str] = prof_dict[str] + 1
                else:
                    prof_dict[str] = 0
        for research_interest in professor.research_interests:
            for str in formatSearchContent(research_interest):
                if str in prof_dict:
                    prof_dict[str] = prof_dict[str] + 1
                else:
                    prof_dict[str] = 0
        for research_group in professor.research_groups:
            for str in formatSearchContent(research_group):
                if str in prof_dict:
                    prof_dict[str] = prof_dict[str] + 1
                else:
                    prof_dict[str] = 0
        if professor.office:
            for str in formatSearchContent(professor.office):
                if str in prof_dict:
                    prof_dict[str] = prof_dict[str] + 1
                else:
                    prof_dict[str] = 0
        if professor.phone:
            for str in formatSearchContent(professor.phone):
                if str in prof_dict:
                    prof_dict[str] = prof_dict[str] + 1
                else:
                    prof_dict[str] = 0
        if professor.email:
            for str in formatSearchContent("".join(re.findall(r'(.+)@', professor.email))):
                if str in prof_dict:
                    prof_dict[str] = prof_dict[str] + 1
                else:
                    prof_dict[str] = 0
        ret[professor.get_id()] = prof_dict
    return ret