def test_keyword_analysis_methods(self, log_data, nicks, nick_same_list): update_expected_output_directory(log_data) keywords_filtered, user_keyword_freq_dict, user_words_dict, nicks_for_stop_words, keywords_for_channels = \ user.keywords(log_data, nicks, nick_same_list); expected_keywords_filtered = [] expected_user_keyword_freq_dict = [] expected_user_word_dict = [] expected_nicks_for_stop_words = [] unjson('keywords_filtered.json', expected_keywords_filtered) unjson('user_words_dict.json', expected_user_word_dict) unjson('user_keyword_freq_dict.json', expected_user_keyword_freq_dict) unjson('nicks_for_stop_words.json', expected_nicks_for_stop_words) self.assertListEqual(user_keyword_freq_dict, expected_user_keyword_freq_dict[0], msg=None) self.assertListEqual(user_words_dict, expected_user_word_dict[0], msg=None) self.assertListEqual(keywords_filtered,expected_keywords_filtered[0]) self.assertListEqual(nicks_for_stop_words,expected_nicks_for_stop_words[0])
def test_keywords(self): expected_keywords_filtered = util.load_from_disk( self.current_directory + "/../../../data/user_test/keywords/keywords_filtered") expected_user_keyword_freq_dict = util.load_from_disk( self.current_directory + "/../../../data/user_test/user_keyword_freq_dict") expected_user_words_dict = util.load_from_disk( self.current_directory + "/../../../data/user_test/keywords/user_words_dict") expected_nicks_for_stop_words = util.load_from_disk( self.current_directory + "/../../../data/user_test/keywords/nicks_for_stop_words") expected_sorted_keywords_for_channels = util.load_from_disk( self.current_directory + "/../../../data/user_test/keywords/sorted_keywords_for_channels") expected_captured_output = util.load_from_disk( self.current_directory + "/data/user_test/keywords/stdout_captured_output") captured_output = StringIO.StringIO() sys.stdout = captured_output keywords_filtered, user_keyword_freq_dict, user_words_dict, nicks_for_stop_words, sorted_keywords_for_channels = user.keywords( self.log_data, self.nicks, self.nick_same_list) sys.stdout = sys.__stdout__ output = captured_output.getvalue() captured_output.close() self.assertEqual(expected_captured_output, output) self.assertEqual(expected_keywords_filtered, keywords_filtered) self.assertEqual(expected_user_keyword_freq_dict, user_keyword_freq_dict) self.assertEqual(expected_user_words_dict, user_words_dict) self.assertEqual(expected_nicks_for_stop_words, nicks_for_stop_words) self.assertEqual(expected_sorted_keywords_for_channels, sorted_keywords_for_channels)
def test_keywords(self, mock_top_keywords_for_nick, mock_extended_stop_words, mock_rec_list_splice, mock_correct_nick_for_, mock_splice_find, mock_correct_last_char_list, mock_correctLastCharCR, mock_check_if_msg_line, mock_get_nick_representative): mock_get_nick_representative.side_effect = util.load_from_disk( self.current_directory + "/data/user_test/get_nick_representative_list") mock_check_if_msg_line.side_effect = util.load_from_disk( self.current_directory + "/data/user_test/check_if_msg_line_list") mock_correctLastCharCR.side_effect = util.load_from_disk( self.current_directory + "/data/user_test/correctLastCharCR_list") mock_correct_last_char_list.side_effect = util.load_from_disk( self.current_directory + "/data/user_test/correct_last_char_list_list") mock_splice_find.side_effect = util.load_from_disk( self.current_directory + "/data/user_test/keywords/splice_find_list") mock_correct_nick_for_.side_effect = util.load_from_disk( self.current_directory + "/data/user_test/correct_nick_for_list") mock_rec_list_splice.side_effect = util.load_from_disk( self.current_directory + "/data/user_test/rec_list_splice_list") mock_extended_stop_words.return_value = util.load_from_disk( self.current_directory + "/data/user_test/keywords/extended_stop_words") mock_top_keywords_for_nick.side_effect = util.load_from_disk( self.current_directory + "/data/user_test/keywords/top_keywords_for_nick") expected_keywords_filtered = util.load_from_disk( self.current_directory + "/../../../data/user_test/keywords/keywords_filtered") expected_user_keyword_freq_dict = util.load_from_disk( self.current_directory + "/../../../data/user_test/user_keyword_freq_dict") expected_user_words_dict = util.load_from_disk( self.current_directory + "/../../../data/user_test/keywords/user_words_dict") expected_nicks_for_stop_words = util.load_from_disk( self.current_directory + "/../../../data/user_test/keywords/nicks_for_stop_words") expected_sorted_keywords_for_channels = util.load_from_disk( self.current_directory + "/../../../data/user_test/keywords/sorted_keywords_for_channels") expected_captured_output = util.load_from_disk( self.current_directory + "/data/user_test/keywords/stdout_captured_output") captured_output = StringIO.StringIO() sys.stdout = captured_output keywords_filtered, user_keyword_freq_dict, user_words_dict, nicks_for_stop_words, sorted_keywords_for_channels = user.keywords( self.log_data, self.nicks, self.nick_same_list) sys.stdout = sys.__stdout__ output = captured_output.getvalue() captured_output.close() self.assertEqual(expected_captured_output, output) self.assertEqual(expected_keywords_filtered, keywords_filtered) self.assertEqual(expected_user_keyword_freq_dict, user_keyword_freq_dict) self.assertEqual(expected_user_words_dict, user_words_dict) self.assertEqual(expected_nicks_for_stop_words, nicks_for_stop_words) self.assertEqual(expected_sorted_keywords_for_channels, sorted_keywords_for_channels)
def identify_hubs_and_experts(log_dict, nicks, nick_same_list): """ uses message_number graph to identify hubs and experts in the network Args: log_dict (dict): with key as dateTime.date object and value as {"data":datalist,"channel_name":channels name} nicks(list): list of all the nicks nick_same_list(list): list of lists mentioning nicks which belong to same users """ message_graph = message_number_graph(log_dict, nicks, nick_same_list) hubs, authority_values = nx.hits(message_graph) keyword_dict_list, user_keyword_freq_dict, user_words_dict_list, nicks_for_stop_words, keywords_for_channels = user.keywords(log_dict, nicks, nick_same_list) if config.DEBUGGER: print "========> USERS" print user_keyword_freq_dict print "========> CHANNELS" print keywords_for_channels, len(keywords_for_channels) top_keywords_for_channels = [] for word_tuple in keywords_for_channels[:config.NUMBER_OF_KEYWORDS_CHANNEL_FOR_OVERLAP]: top_keywords_for_channels.append(word_tuple[0]) overlap_word_number = [] for keyword_tuple in user_keyword_freq_dict: keywords_for_user = keyword_tuple['keywords'] username = keyword_tuple['nick'] overlapping_keywords = list(set(top_keywords_for_channels).intersection([x[0] for x in keywords_for_user])) if len(overlapping_keywords) > 0: overlap_word_number.append([username, len(overlapping_keywords)]) top_hubs_with_score = util.find_top_n_element_after_sorting(hubs.items(), 1, True, config.HOW_MANY_TOP_EXPERTS) top_auth_with_score = util.find_top_n_element_after_sorting(authority_values.items(), 1, True, config.HOW_MANY_TOP_EXPERTS) top_keyword_overlap_with_score = util.find_top_n_element_after_sorting(overlap_word_number, 1, True, config.HOW_MANY_TOP_EXPERTS) print "TOP " + str(config.HOW_MANY_TOP_EXPERTS) + " HUBS\n", top_hubs_with_score print "TOP " + str(config.HOW_MANY_TOP_EXPERTS) + " AUTH\n", top_auth_with_score print "TOP " + str(config.HOW_MANY_TOP_EXPERTS) + " KEYWORD OVERLAP\n", top_keyword_overlap_with_score top_hub = [hub_tuple[0] for hub_tuple in top_hubs_with_score] top_auth = [auth_tuple[0] for auth_tuple in top_auth_with_score] top_keyword_overlap = [key_overlap_tuple[0] for key_overlap_tuple in top_keyword_overlap_with_score] for node_name in message_graph: # mark EXPERTS message_graph.node[node_name]['style'] = 'filled' if node_name in top_auth and node_name in top_keyword_overlap: message_graph.node[node_name]['color'] = '#ff000' elif node_name in top_auth: message_graph.node[node_name]['color'] = '#00ff00' elif node_name in top_keyword_overlap: message_graph.node[node_name]['color'] = '#0000ff' else: message_graph.node[node_name]['color'] = '#cccccc' # mark HUBS if node_name in top_hub: message_graph.node[node_name]['shape'] = 'square' return message_graph, top_hub, top_keyword_overlap, top_auth