def buildTrie(): trie = Trie() for professor in Professor.get_all_professors(): for str in formatSearchContent(professor.name): trie.add(str, professor.get_id()) for str in formatSearchContent(professor.title): trie.add(str, professor.get_id()) if professor.special_title: for str in formatSearchContent(professor.special_title): trie.add(str, professor.get_id()) if professor.introduction: for str in formatSearchContent(professor.introduction): trie.add(str, professor.get_id()) for research_area in professor.research_areas: for str in formatSearchContent(research_area): trie.add(str, professor.get_id()) for research_interest in professor.research_interests: for str in formatSearchContent(research_interest): trie.add(str, professor.get_id()) for research_group in professor.research_groups: for str in formatSearchContent(research_group): trie.add(str, professor.get_id()) if professor.office: for str in formatSearchContent(professor.office): trie.add(str, professor.get_id()) if professor.phone: for str in formatSearchContent(professor.phone): trie.add(str, professor.get_id()) if professor.email: for str in formatSearchContent("".join(re.findall(r'(.+)@', professor.email))): trie.add(str, professor.get_id()) return trie
def search_professors(query_words): query_words = formatSearchContent(query_words) pid = [] relevence = {} trie = PROFESSOR_TRIE for word in query_words: pids_of_word = set() for string in trie.searchSubstring(word): pids_of_string = trie.get(string) for prof_id in pids_of_string: if not(prof_id in relevence): relevence[prof_id] = 0.0 relevence[prof_id] = relevence[prof_id] + float(len(word))/len(string) * OCCURRENCE_DICT[prof_id][string] pids_of_word = pids_of_word.union(pids_of_string) pid.append(pids_of_word) ret = [Professor.get_professor(prof_id) for prof_id in reduce(lambda x, y: x.intersection(y), pid)] return sorted(ret, key=lambda x: -relevence[x.key.id()])
def buildOccurrenceDict(): ret = dict() for professor in Professor.get_all_professors(): prof_dict = dict() for str in formatSearchContent(professor.name): if str in prof_dict: prof_dict[str] = prof_dict[str] + 1 else: prof_dict[str] = 0 for str in formatSearchContent(professor.title): if str in prof_dict: prof_dict[str] = prof_dict[str] + 1 else: prof_dict[str] = 0 if professor.special_title: for str in formatSearchContent(professor.special_title): if str in prof_dict: prof_dict[str] = prof_dict[str] + 1 else: prof_dict[str] = 0 if professor.introduction: for str in formatSearchContent(professor.introduction): if str in prof_dict: prof_dict[str] = prof_dict[str] + 1 else: prof_dict[str] = 0 for research_area in professor.research_areas: for str in formatSearchContent(research_area): if str in prof_dict: prof_dict[str] = prof_dict[str] + 1 else: prof_dict[str] = 0 for research_interest in professor.research_interests: for str in formatSearchContent(research_interest): if str in prof_dict: prof_dict[str] = prof_dict[str] + 1 else: prof_dict[str] = 0 for research_group in professor.research_groups: for str in formatSearchContent(research_group): if str in prof_dict: prof_dict[str] = prof_dict[str] + 1 else: prof_dict[str] = 0 if professor.office: for str in formatSearchContent(professor.office): if str in prof_dict: prof_dict[str] = prof_dict[str] + 1 else: prof_dict[str] = 0 if professor.phone: for str in formatSearchContent(professor.phone): if str in prof_dict: prof_dict[str] = prof_dict[str] + 1 else: prof_dict[str] = 0 if professor.email: for str in formatSearchContent("".join(re.findall(r'(.+)@', professor.email))): if str in prof_dict: prof_dict[str] = prof_dict[str] + 1 else: prof_dict[str] = 0 ret[professor.get_id()] = prof_dict return ret